// 文档 https://github.com/hooke007/MPV_lazy/wiki/4_GLSL

// CuNNy 3x12 DS
// Copyright (c) 2024 funnyplanter

// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3.0 of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this program.  If not, see <https://www.gnu.org/licenses/>.
/* ------------------------------------------------------------------- */


//!DESC [CuNNy_3x12_DS] -in
//!HOOK LUMA
//!COMPUTE 24 8 8 8
//!BIND LUMA
//!SAVE in
//!WIDTH LUMA.w 3 *
//!HEIGHT LUMA.h
//!COMPONENTS 4
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) F((LUMA_mul * texelFetch(LUMA_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(1, 1) + ivec2(0, 0), 0)).r)
shared F G[1][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(3, 1);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
		}
	}
	barrier();
	F s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2;
	V4 r0, r1, r2;
	r0 = V4(0.0); r1 = V4(0.0); r2 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2];
	r0 += V4(1.678e-01, 1.604e-02, -1.335e-01, -1.412e-01) * s0_0_0;
	r1 += V4(3.048e-03, 2.490e-01, 7.037e-01, 1.393e-01) * s0_0_0;
	r2 += V4(-2.513e-02, 5.565e-02, -4.521e-01, -1.128e-01) * s0_0_0;
	r0 += V4(-2.459e-01, -1.631e-01, -7.604e-01, 1.907e-01) * s0_0_1;
	r1 += V4(9.485e-01, 1.525e-01, 6.440e-02, 4.822e-01) * s0_0_1;
	r2 += V4(2.451e-03, 9.345e-01, -2.703e-01, 1.168e-01) * s0_0_1;
	r0 += V4(4.524e-02, 1.343e-02, 5.090e-02, -3.698e-02) * s0_0_2;
	r1 += V4(1.596e-03, 2.291e-02, -1.665e-02, 2.847e-01) * s0_0_2;
	r2 += V4(-1.470e-02, 6.811e-02, 6.616e-03, -3.255e-02) * s0_0_2;
	r0 += V4(7.864e-01, 3.347e-01, 9.085e-01, 4.008e-02) * s0_1_0;
	r1 += V4(-2.224e-02, -8.055e-02, 1.450e-01, -1.462e-02) * s0_1_0;
	r2 += V4(1.284e-01, -3.005e-01, -2.764e-01, -8.187e-01) * s0_1_0;
	r0 += V4(-6.699e-01, -2.131e-01, -2.456e-02, -3.329e-01) * s0_1_1;
	r1 += V4(-9.199e-01, -9.877e-01, -8.485e-01, -9.486e-01) * s0_1_1;
	r2 += V4(3.352e-01, -5.996e-01, 9.794e-01, 8.613e-01) * s0_1_1;
	r0 += V4(-8.619e-02, -6.088e-03, -4.200e-02, 8.447e-02) * s0_1_2;
	r1 += V4(-1.112e-02, 1.299e-01, -4.221e-02, 1.054e-02) * s0_1_2;
	r2 += V4(-1.845e-01, -1.158e-01, 2.289e-02, 1.642e-02) * s0_1_2;
	r0 += V4(7.534e-02, -3.221e-02, -5.311e-02, -2.348e-03) * s0_2_0;
	r1 += V4(2.002e-02, 3.208e-02, -2.445e-02, 1.234e-01) * s0_2_0;
	r2 += V4(-5.716e-02, 1.075e-02, 3.455e-02, -7.073e-02) * s0_2_0;
	r0 += V4(-6.689e-02, 6.796e-02, 6.740e-02, 1.433e-02) * s0_2_1;
	r1 += V4(-2.959e-02, -3.576e-02, -4.847e-02, -1.231e-01) * s0_2_1;
	r2 += V4(-3.316e-02, -5.719e-02, 8.220e-03, 2.205e-02) * s0_2_1;
	r0 += V4(-9.039e-03, 6.588e-03, -1.251e-02, -1.546e-02) * s0_2_2;
	r1 += V4(1.093e-02, 1.360e-01, 6.161e-02, 1.319e-02) * s0_2_2;
	r2 += V4(-8.948e-02, 5.146e-03, -5.310e-02, 1.393e-02) * s0_2_2;
	r0 += V4(2.032e-02, 5.188e-02, -2.736e-04, 2.116e-01);
	r0 = clamp(r0, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0));
	r1 += V4(-1.868e-03, 2.315e-03, -2.456e-03, -1.397e-02);
	r1 = clamp(r1, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r1));
	r2 += V4(3.198e-03, 2.130e-02, -7.639e-03, 2.031e-02);
	r2 = clamp(r2, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(2, 0), vec4(r2));
}

//!DESC [CuNNy_3x12_DS] -conv1
//!HOOK LUMA
//!COMPUTE 24 8 8 8
//!BIND in
//!BIND LUMA
//!SAVE conv1
//!WIDTH LUMA.w 3 *
//!HEIGHT LUMA.h
//!COMPONENTS 4
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) V4((in_mul * texelFetch(in_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(0, 0), 0)))
#define l1(x, y) V4((in_mul * texelFetch(in_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(1, 0), 0)))
#define l2(x, y) V4((in_mul * texelFetch(in_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(2, 0), 0)))
shared V4 G[3][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(3, 1);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
			G[1][ay][ax] = l1(x - 1, y - 1);
			G[2][ay][ax] = l2(x - 1, y - 1);
		}
	}
	barrier();
	V4 s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2, s1_0_0, s1_0_1, s1_0_2, s1_1_0, s1_1_1, s1_1_2, s1_2_0, s1_2_1, s1_2_2;
	V4 r0, r1, r2;
	r0 = V4(0.0); r1 = V4(0.0); r2 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2]; s1_0_0 = G[1][xy.y+0][xy.x+0];
	s1_0_1 = G[1][xy.y+0][xy.x+1]; s1_0_2 = G[1][xy.y+0][xy.x+2];
	s1_1_0 = G[1][xy.y+1][xy.x+0]; s1_1_1 = G[1][xy.y+1][xy.x+1];
	s1_1_2 = G[1][xy.y+1][xy.x+2]; s1_2_0 = G[1][xy.y+2][xy.x+0];
	s1_2_1 = G[1][xy.y+2][xy.x+1]; s1_2_2 = G[1][xy.y+2][xy.x+2];
	r0 += M4(3.206e-03, 5.035e-03, -8.861e-02, 3.447e-01, -2.484e-01, 1.050e-01, 1.178e-02, -4.327e-01, 1.725e-02, -4.456e-02, 2.293e-02, -3.335e-03, -1.684e-01, 1.254e-03, 1.042e-01, -1.601e-01) * s0_0_0;
	r1 += M4(-3.096e-01, 7.997e-02, 1.870e-01, 2.882e-02, 3.027e-02, 5.456e-02, 5.631e-01, 1.168e-01, -1.042e-01, -6.091e-02, -5.595e-02, -2.071e-01, -1.222e-01, -6.772e-03, 4.579e-01, 2.446e-01) * s0_0_0;
	r2 += M4(4.712e-01, 1.291e-01, 2.921e-01, 2.216e-01, 3.414e-01, -4.338e-02, -8.894e-02, -4.331e-01, -4.511e-01, -1.020e-03, -9.289e-02, 5.321e-02, -1.842e-01, 3.388e-01, 1.288e-01, -1.283e-01) * s0_0_0;
	r0 += M4(1.018e-02, -1.988e-01, -8.273e-02, 2.055e-01, 1.225e-01, 5.865e-01, -1.058e-01, 5.682e-01, -8.402e-02, 1.489e-01, 1.387e-01, -1.763e-01, -3.642e-01, 8.374e-01, 1.752e-01, 6.340e-02) * s0_0_1;
	r1 += M4(-1.000e+00, -6.553e-02, 1.039e-01, -5.241e-01, -9.048e-02, 2.280e-01, 1.332e-01, 2.854e-01, 1.200e-01, -9.731e-02, 1.481e-01, -4.093e-01, -2.847e-01, -1.692e-01, 2.037e-01, -5.034e-01) * s0_0_1;
	r2 += M4(6.327e-01, 3.413e-01, 3.055e-01, 5.695e-02, 6.254e-01, 5.114e-02, 2.520e-01, 1.311e-01, -4.139e-01, -1.185e-01, -1.616e-01, -3.462e-02, -2.270e-03, 3.408e-01, -2.796e-01, -1.348e-01) * s0_0_1;
	r0 += M4(-1.767e-01, 4.346e-01, 1.657e-01, 1.142e-01, 1.919e-01, -2.980e-01, -6.115e-02, -1.272e-01, 4.717e-02, -2.226e-02, -7.251e-02, -1.423e-03, -1.932e-01, 3.450e-01, -7.140e-02, 1.068e-01) * s0_0_2;
	r1 += M4(-3.250e-02, -1.074e-01, -1.585e-01, -1.898e-01, -8.752e-02, 5.090e-01, 1.681e-01, 5.408e-01, 3.698e-02, -1.401e-01, 2.252e-01, -2.839e-01, 5.617e-02, -3.209e-01, -7.758e-03, 1.069e-02) * s0_0_2;
	r2 += M4(3.105e-01, 2.827e-01, 1.269e-01, -2.076e-01, -1.060e-01, 1.530e-01, -1.104e-01, 2.667e-01, -1.890e-01, -4.809e-02, 8.907e-02, 9.254e-02, 2.282e-01, -1.035e-01, 8.997e-02, 1.188e-01) * s0_0_2;
	r0 += M4(-2.987e-02, 1.599e-01, -5.683e-02, -5.613e-01, -1.886e-01, -5.465e-01, -1.528e-01, -2.593e-01, -2.325e-02, 9.169e-02, 1.743e-01, 2.287e-01, 1.403e-01, 1.043e-01, 1.390e-01, 8.361e-01) * s0_1_0;
	r1 += M4(2.251e-01, -3.378e-02, 3.295e-02, -1.948e-01, -4.696e-01, -2.959e-01, -4.676e-01, 9.134e-02, -1.920e-01, 1.660e-01, -1.030e-01, -7.322e-02, 2.721e-02, 7.339e-02, -3.376e-01, -1.194e-01) * s0_1_0;
	r2 += M4(1.328e-01, 1.259e-01, 1.767e-01, 3.755e-01, -4.132e-02, -3.973e-01, -2.670e-01, 9.996e-01, -1.516e-01, 1.918e-02, 7.997e-02, 1.926e-02, 6.672e-01, -1.494e-02, -5.538e-02, 2.229e-01) * s0_1_0;
	r0 += M4(-1.184e-02, 5.837e-01, -1.082e-01, -1.000e+00, 1.366e-01, -9.325e-02, -1.608e-01, -7.328e-01, 3.368e-01, -2.818e-01, 4.556e-02, -2.015e-01, 7.480e-01, -1.000e+00, -3.188e-01, -7.099e-01) * s0_1_1;
	r1 += M4(9.455e-01, 1.019e-01, 5.073e-01, -2.291e-01, 7.168e-01, 5.408e-01, 1.395e-01, 4.553e-02, -5.791e-01, -6.061e-03, 1.239e-01, -3.954e-01, 3.172e-01, 6.813e-01, -9.063e-01, 1.932e-01) * s0_1_1;
	r2 += M4(-3.409e-01, -7.106e-01, -2.296e-01, 8.675e-01, -4.941e-01, -3.729e-01, 5.871e-02, -3.794e-01, 3.211e-01, -1.734e-01, -2.894e-01, -4.976e-01, -9.475e-01, -9.827e-01, 6.369e-01, 1.766e-01) * s0_1_1;
	r0 += M4(3.273e-01, -4.300e-01, -9.149e-02, -7.231e-01, -2.208e-01, 1.620e-01, 4.722e-01, 6.047e-01, -2.345e-01, 1.184e-01, -1.203e-01, 1.397e-01, -1.120e-01, -2.309e-01, 2.284e-01, -6.201e-03) * s0_1_2;
	r1 += M4(-5.750e-02, 3.885e-01, -9.166e-02, 5.801e-02, 1.266e-01, -3.421e-01, -5.463e-01, 4.743e-02, 3.135e-01, -1.028e-01, 2.299e-01, -3.972e-01, 2.846e-02, -4.568e-02, 2.050e-01, -5.779e-02) * s0_1_2;
	r2 += M4(-4.791e-01, -5.449e-01, -3.685e-01, -2.693e-01, -2.543e-01, -2.737e-01, 2.103e-01, 2.251e-01, -2.886e-01, 1.277e-01, -4.451e-01, 1.239e-01, 1.944e-01, 4.311e-01, -1.532e-01, -1.165e-01) * s0_1_2;
	r0 += M4(1.413e-01, -1.078e-01, 1.588e-02, 1.928e-01, -2.313e-02, 1.116e-01, 4.723e-02, 4.327e-02, -1.452e-01, 6.611e-03, 1.501e-01, -3.232e-02, -4.373e-02, -8.928e-02, 1.856e-01, 8.262e-02) * s0_2_0;
	r1 += M4(-1.013e-01, 1.259e-02, -1.785e-01, -3.405e-01, 2.017e-01, -4.443e-01, 6.497e-01, -3.338e-02, -1.197e-01, 2.692e-01, -5.977e-01, -1.876e-01, 5.563e-02, -3.597e-03, 4.120e-01, 1.843e-01) * s0_2_0;
	r2 += M4(-1.519e-01, -6.484e-03, -1.632e-01, 1.050e-01, -9.998e-02, -7.054e-01, -1.205e-01, -2.775e-01, -1.118e-02, 3.252e-01, 1.929e-01, -1.151e-01, 6.343e-02, 1.328e-01, -2.019e-02, 2.524e-01) * s0_2_0;
	r0 += M4(4.703e-01, -7.446e-02, 1.217e-01, -2.126e-01, 4.115e-01, 6.116e-02, -2.068e-01, 2.552e-01, -1.569e-01, 4.473e-02, 4.070e-01, 1.184e-01, -2.636e-01, 1.274e-01, -5.309e-01, -2.411e-02) * s0_2_1;
	r1 += M4(2.885e-01, 5.887e-02, -1.869e-01, -1.445e-01, 1.018e-01, -4.598e-01, -3.443e-01, -3.405e-01, 6.420e-02, 2.959e-01, 6.227e-02, -1.393e-01, 1.257e-01, -3.895e-01, -1.088e-03, 1.310e-01) * s0_2_1;
	r2 += M4(-7.441e-01, 6.947e-01, -1.458e-02, -8.321e-02, 3.258e-02, 4.920e-01, 3.064e-01, -3.452e-01, 2.648e-01, -5.293e-01, 4.603e-02, -2.054e-01, 2.475e-01, -1.812e-01, -2.364e-01, -2.339e-02) * s0_2_1;
	r0 += M4(4.793e-02, 3.786e-02, 1.818e-01, 2.075e-01, -3.115e-01, 1.403e-01, -2.749e-01, 2.347e-01, 1.372e-01, -9.252e-02, 7.684e-02, -1.626e-01, 2.451e-01, -1.045e-01, 4.055e-03, -2.157e-01) * s0_2_2;
	r1 += M4(-3.855e-01, -1.418e-01, 1.004e-01, -3.760e-01, -2.207e-01, 1.369e-01, 4.554e-02, -1.614e-01, 1.892e-01, 1.114e-01, -7.252e-02, 1.751e-02, -1.659e-01, 2.041e-01, -1.450e-01, 2.864e-02) * s0_2_2;
	r2 += M4(-5.737e-01, -9.376e-02, -5.892e-02, -1.304e-01, 2.250e-01, 7.472e-01, 6.970e-02, 2.047e-01, -1.462e-01, -1.000e+00, 2.470e-02, 1.901e-02, -2.103e-01, 7.556e-02, -7.524e-02, -1.973e-01) * s0_2_2;
	r0 += M4(1.359e-01, -4.459e-02, -5.257e-02, 7.369e-02, 7.323e-02, -6.626e-02, -7.479e-02, -1.775e-03, 3.072e-02, 5.737e-02, 8.814e-02, -1.182e-01, -2.075e-01, -1.450e-01, 4.944e-02, 5.554e-02) * s1_0_0;
	r1 += M4(-2.536e-01, -1.372e-01, 7.600e-02, 2.043e-01, -2.341e-01, 1.411e-02, 2.286e-01, -4.392e-01, 1.510e-01, -2.433e-02, -8.661e-02, 2.845e-01, -8.094e-03, 1.183e-01, -9.949e-03, -3.717e-03) * s1_0_0;
	r2 += M4(-7.127e-01, 2.549e-01, -4.443e-01, -4.277e-02, -1.676e-01, 2.024e-01, -4.373e-04, -2.417e-01, -1.661e-01, 1.597e-01, -1.542e-01, -1.032e-01, 5.629e-02, 1.006e-01, 4.506e-01, 8.448e-02) * s1_0_0;
	r0 += M4(-8.204e-02, 1.000e+00, -8.861e-02, 1.647e-01, -1.956e-02, 6.799e-03, -1.697e-02, -1.803e-01, -9.913e-02, 4.292e-01, -2.148e-02, -7.319e-02, -2.028e-01, 1.458e-01, 2.767e-01, 1.996e-01) * s1_0_1;
	r1 += M4(-1.192e-02, -1.406e-01, -3.205e-01, -3.036e-01, -5.371e-01, -1.778e-02, 2.521e-01, -5.873e-02, 4.182e-02, -8.250e-02, -2.817e-02, 4.844e-01, 3.959e-01, -2.262e-02, -3.234e-01, 1.495e-01) * s1_0_1;
	r2 += M4(5.646e-01, 1.000e+00, -3.166e-01, 3.321e-01, 1.206e-02, -4.550e-01, 1.743e-01, -3.170e-01, 1.792e-01, -8.130e-02, -5.679e-02, 1.018e-01, -5.724e-01, -1.394e-01, 2.945e-01, -1.114e-01) * s1_0_1;
	r0 += M4(1.401e-01, -1.030e-01, -1.216e-01, 7.248e-02, -1.352e-02, -7.123e-02, 1.251e-02, -1.812e-02, 4.904e-02, -9.447e-02, -7.400e-03, -3.289e-02, 3.269e-02, -1.445e-02, 2.669e-02, 3.298e-02) * s1_0_2;
	r1 += M4(1.746e-01, -1.324e-01, -1.652e-01, 2.758e-01, 3.622e-01, 8.935e-02, -3.540e-03, 1.740e-01, -1.426e-01, -9.201e-02, 2.374e-01, 6.462e-02, 2.418e-01, -2.905e-02, 2.229e-01, -2.432e-02) * s1_0_2;
	r2 += M4(2.248e-01, 7.340e-01, -1.621e-01, -2.701e-02, 7.197e-02, -3.514e-01, 1.984e-01, -9.678e-02, 1.168e-01, -2.756e-01, 7.311e-03, 5.431e-02, -1.065e-01, 6.377e-02, 3.100e-02, 4.759e-02) * s1_0_2;
	r0 += M4(1.333e-01, 1.294e-01, 4.144e-01, 5.149e-01, -2.026e-01, -4.543e-02, 1.240e-01, -1.046e-01, 1.752e-01, -3.015e-02, 1.850e-01, 3.084e-01, -8.668e-02, 1.227e-01, 8.969e-02, 8.690e-02) * s1_1_0;
	r1 += M4(-9.601e-01, 3.472e-01, -8.946e-01, 7.542e-02, 1.238e-01, 6.089e-02, -1.000e+00, 5.705e-04, 2.678e-01, 9.845e-02, -1.000e+00, 3.232e-01, 5.928e-01, -6.756e-02, -1.404e-01, -5.517e-01) * s1_1_0;
	r2 += M4(-6.468e-01, 5.736e-01, -6.271e-01, -9.417e-02, 3.740e-01, -8.128e-02, 7.442e-02, 6.980e-01, 2.170e-02, -1.061e-01, -2.694e-02, -1.979e-01, -3.344e-01, -2.436e-02, 3.374e-01, 1.000e+00) * s1_1_0;
	r0 += M4(4.914e-01, -6.472e-01, -1.235e-01, -6.230e-01, 3.855e-01, 2.074e-01, -5.345e-02, 2.518e-01, 7.740e-02, -1.000e+00, -2.122e-01, -4.757e-01, 2.272e-02, -1.358e-01, 2.428e-01, -1.605e-01) * s1_1_1;
	r1 += M4(-9.590e-01, 7.667e-01, 9.994e-01, 2.788e-01, -2.569e-01, -5.341e-01, -9.434e-01, -6.977e-01, 9.062e-02, 4.122e-01, 7.768e-01, 9.889e-01, -8.780e-01, -3.831e-02, -5.108e-01, -2.882e-01) * s1_1_1;
	r2 += M4(4.033e-01, 8.495e-01, -9.326e-01, -3.376e-02, -4.853e-01, -8.252e-02, 4.737e-01, -8.205e-02, -3.217e-02, 4.194e-01, -7.859e-02, -6.080e-02, -1.803e-01, 2.924e-01, 1.000e+00, -2.765e-01) * s1_1_1;
	r0 += M4(-1.877e-02, 2.411e-01, -2.594e-01, 5.071e-02, 8.944e-02, 1.318e-01, 6.028e-03, -5.261e-02, -3.153e-02, -4.144e-02, 9.866e-02, 1.925e-01, -1.194e-01, -4.710e-03, 1.255e-01, 1.067e-01) * s1_1_2;
	r1 += M4(9.934e-01, -2.306e-01, 1.400e-01, 5.762e-02, 1.256e-01, 2.509e-02, 3.964e-02, 4.783e-01, 4.390e-02, -1.461e-01, -1.911e-02, 5.188e-01, -4.078e-02, -2.307e-02, -1.143e-01, -2.193e-01) * s1_1_2;
	r2 += M4(1.450e-01, -1.513e-01, 6.501e-02, 9.144e-02, 1.649e-02, 1.549e-01, 1.141e-02, 7.027e-02, 5.081e-01, -2.721e-01, 7.165e-02, 1.212e-01, -2.434e-01, 2.153e-01, -6.052e-02, -1.283e-03) * s1_1_2;
	r0 += M4(-7.055e-02, -2.665e-01, 1.869e-01, -2.749e-01, -1.422e-01, -7.961e-02, 1.353e-01, -1.588e-01, 6.427e-02, 5.268e-02, 2.989e-02, 1.355e-01, -4.423e-02, 3.178e-02, -1.367e-01, -2.451e-02) * s1_2_0;
	r1 += M4(-8.655e-02, 5.910e-01, -1.000e+00, 5.098e-01, -1.233e-01, 1.403e-01, -2.751e-02, 5.010e-02, -4.458e-02, 1.261e-02, -4.363e-02, 4.204e-01, 3.251e-01, -4.521e-02, 2.730e-01, -3.055e-01) * s1_2_0;
	r2 += M4(5.982e-01, 5.462e-01, -2.268e-01, -8.691e-01, -9.034e-02, -3.063e-01, 1.565e-02, 1.311e-01, 3.590e-01, -3.112e-03, 1.143e-01, 1.581e-01, -3.117e-01, 1.671e-01, -7.578e-02, 1.996e-01) * s1_2_0;
	r0 += M4(-7.123e-01, -6.141e-02, 3.291e-01, -1.278e-01, -2.703e-01, 4.497e-02, -4.463e-03, -7.474e-02, -9.160e-02, -5.929e-02, 2.726e-01, 1.342e-01, -8.912e-02, 1.673e-01, -1.348e-01, 2.867e-01) * s1_2_1;
	r1 += M4(9.062e-02, 1.000e+00, 5.189e-01, 3.234e-01, -2.000e-01, -6.907e-02, -5.659e-02, 6.887e-02, -1.811e-01, -1.697e-01, 4.908e-02, 6.687e-01, -2.437e-02, -1.579e-01, -4.518e-02, -2.763e-01) * s1_2_1;
	r2 += M4(4.869e-01, -1.000e+00, 4.604e-02, 4.981e-02, -8.923e-02, -1.000e+00, -7.005e-02, 1.510e-02, 4.411e-01, -1.992e-01, -3.170e-01, -9.621e-02, 1.010e-01, 2.909e-03, -3.106e-01, 2.127e-02) * s1_2_1;
	r0 += M4(1.185e-01, -8.189e-02, -2.902e-01, -4.743e-02, -1.785e-01, 4.843e-02, 3.899e-03, 3.585e-02, -3.448e-01, -1.387e-02, 5.810e-02, 4.662e-02, 1.881e-02, 6.077e-02, 7.279e-02, 2.917e-02) * s1_2_2;
	r1 += M4(1.000e+00, -9.894e-02, 1.376e-01, 2.006e-01, 1.848e-01, 2.477e-02, 1.870e-02, 2.038e-02, 1.541e-02, 2.683e-02, 7.829e-03, 5.353e-01, 1.101e-02, 1.179e-01, 6.801e-02, -1.766e-01) * s1_2_2;
	r2 += M4(-1.254e-01, -1.689e-01, 2.379e-01, 1.448e-02, -3.979e-02, 3.441e-01, 5.645e-02, 2.744e-03, 9.150e-01, -1.000e+00, 2.095e-01, 1.593e-01, -5.873e-02, -3.921e-01, -1.267e-01, 4.461e-02) * s1_2_2;
	s0_0_0 = G[2][xy.y+0][xy.x+0]; s0_0_1 = G[2][xy.y+0][xy.x+1];
	s0_0_2 = G[2][xy.y+0][xy.x+2]; s0_1_0 = G[2][xy.y+1][xy.x+0];
	s0_1_1 = G[2][xy.y+1][xy.x+1]; s0_1_2 = G[2][xy.y+1][xy.x+2];
	s0_2_0 = G[2][xy.y+2][xy.x+0]; s0_2_1 = G[2][xy.y+2][xy.x+1];
	s0_2_2 = G[2][xy.y+2][xy.x+2];
	r0 += M4(-3.571e-02, -5.145e-02, 3.382e-02, -2.085e-01, 2.158e-02, -2.361e-02, -8.915e-03, -1.785e-02, 9.497e-02, 9.068e-02, -1.152e-01, 1.549e-01, -2.920e-02, -2.579e-02, 2.273e-01, -1.822e-01) * s0_0_0;
	r1 += M4(1.243e-01, -9.179e-03, 1.760e-01, 2.757e-01, -2.196e-02, 1.173e-01, -1.474e-02, -3.428e-01, 1.806e-01, 4.894e-03, -2.326e-01, 1.867e-01, -1.032e-02, -1.230e-02, 2.761e-02, -1.967e-01) * s0_0_0;
	r2 += M4(1.100e-01, -2.573e-01, -3.942e-01, 3.186e-01, 7.536e-01, -2.593e-01, 1.605e-01, -9.201e-02, 5.801e-01, 6.232e-02, 2.057e-01, 1.225e-01, -8.340e-01, -3.269e-02, -2.421e-01, -3.162e-01) * s0_0_0;
	r0 += M4(1.598e-01, -4.825e-01, -2.479e-01, -3.308e-02, 2.708e-01, -1.000e+00, -1.226e-01, -1.440e-01, 1.022e-02, -4.069e-01, 1.605e-02, 3.292e-02, 2.620e-01, 3.727e-01, 1.630e-01, 1.153e-01) * s0_0_1;
	r1 += M4(-2.456e-01, 1.411e-02, -4.795e-01, 1.107e-01, -5.212e-02, 1.675e-01, 3.712e-01, 4.121e-02, -2.789e-01, 1.297e-01, 1.015e-02, 4.667e-01, 8.533e-01, -1.129e-01, 3.517e-02, -2.207e-02) * s0_0_1;
	r2 += M4(3.010e-01, -1.541e-01, -6.384e-02, -2.562e-01, -2.157e-01, -6.240e-01, 1.039e-01, -2.212e-01, 2.627e-01, 1.614e-01, 1.104e-01, -1.429e-01, -8.613e-01, -9.691e-02, -3.230e-01, -2.295e-01) * s0_0_1;
	r0 += M4(-5.982e-02, 5.324e-03, 4.798e-02, -1.100e-02, -8.752e-02, 1.718e-01, 4.802e-02, -1.236e-01, -7.786e-02, 1.859e-01, 2.147e-02, 3.506e-02, 1.726e-01, -2.530e-01, -9.239e-02, -2.007e-01) * s0_0_2;
	r1 += M4(4.484e-02, 9.434e-02, 1.053e-01, -2.245e-01, -4.189e-01, 1.853e-01, 1.019e-01, -4.221e-01, 8.603e-02, 8.157e-02, -3.022e-01, 2.485e-01, -5.749e-01, 1.568e-01, 3.557e-01, -2.722e-01) * s0_0_2;
	r2 += M4(-2.455e-01, 3.123e-01, -1.546e-01, 5.056e-02, -7.929e-02, -4.780e-01, -1.498e-02, 4.838e-02, 2.963e-01, -3.587e-02, -9.657e-02, -7.337e-02, -2.822e-01, 2.167e-01, 1.262e-01, 1.910e-01) * s0_0_2;
	r0 += M4(2.355e-01, -2.154e-01, -5.588e-03, -3.742e-01, 2.055e-01, -4.779e-01, -3.623e-01, -5.083e-01, -8.163e-02, 1.715e-02, -2.769e-01, -3.841e-01, -7.768e-03, -2.376e-02, 1.836e-01, 3.850e-01) * s0_1_0;
	r1 += M4(7.153e-02, 1.076e-01, -5.016e-01, -4.341e-02, 2.006e-01, -1.891e-01, 1.392e-01, -2.179e-01, 3.561e-01, 3.891e-02, 6.579e-01, 7.481e-01, -4.741e-01, 7.350e-03, -3.303e-01, -2.588e-01) * s0_1_0;
	r2 += M4(-7.819e-02, 7.095e-01, -7.601e-02, -5.430e-02, 3.496e-01, -5.103e-01, 2.646e-01, -6.004e-01, 5.846e-01, 2.118e-01, -9.214e-02, 3.901e-01, -2.407e-01, 9.370e-03, -2.118e-02, -1.000e+00) * s0_1_0;
	r0 += M4(-3.967e-01, 6.126e-01, 3.175e-01, 2.194e-01, -3.271e-01, -1.000e+00, -1.795e-01, 5.254e-01, -9.123e-02, 4.385e-01, 6.811e-02, 6.130e-01, 7.592e-02, -1.096e-01, 2.985e-01, 4.039e-01) * s0_1_1;
	r1 += M4(1.867e-01, -1.479e-02, 4.851e-01, -4.032e-02, 1.000e+00, -3.948e-01, -3.682e-01, -4.588e-01, 2.776e-01, -3.467e-01, -4.816e-01, 1.187e-02, -1.000e+00, 4.699e-02, -9.825e-01, -1.005e-01) * s0_1_1;
	r2 += M4(-7.305e-01, -5.250e-01, 1.487e-01, 4.137e-01, -2.868e-02, -6.794e-01, 7.654e-02, 5.898e-02, -1.791e-01, -1.827e-01, -8.435e-01, -5.044e-02, 7.389e-01, 3.671e-01, 1.995e-01, -4.294e-01) * s0_1_1;
	r0 += M4(1.423e-01, -3.287e-02, 6.992e-03, 9.700e-02, 1.536e-01, -3.457e-01, 3.699e-02, -2.656e-02, 2.150e-01, -3.431e-01, 4.880e-03, -3.294e-01, -5.982e-02, 3.838e-01, -1.581e-01, 5.371e-01) * s0_1_2;
	r1 += M4(-1.251e-01, -1.028e-01, -1.205e-01, -7.683e-02, -2.959e-01, 1.317e-01, -1.008e-01, -1.006e-01, -2.271e-01, 4.262e-01, -7.186e-03, 3.037e-01, 1.709e-01, -4.212e-01, 4.320e-01, 5.476e-02) * s0_1_2;
	r2 += M4(3.189e-01, 1.327e-01, 2.003e-01, 5.836e-02, -1.254e-01, -7.990e-02, -1.404e-01, -5.076e-02, 5.458e-02, 3.512e-01, 1.837e-01, -1.481e-01, 6.180e-01, -1.000e+00, -3.885e-02, 2.106e-01) * s0_1_2;
	r0 += M4(-8.283e-02, -1.443e-01, 1.911e-02, 1.165e-01, 1.479e-01, 1.343e-01, 3.092e-02, 4.507e-01, -8.916e-02, -3.781e-02, -1.574e-01, -1.646e-01, -2.106e-02, 4.338e-02, 1.826e-03, -7.202e-02) * s0_2_0;
	r1 += M4(-2.464e-01, -2.108e-01, 1.321e-02, -6.482e-01, -6.249e-02, -2.859e-01, 4.322e-01, -6.375e-01, -1.723e-01, -6.720e-02, -1.356e-01, 1.935e-01, 1.975e-01, 6.661e-02, -2.962e-02, -8.667e-02) * s0_2_0;
	r2 += M4(-3.620e-01, -3.466e-01, 1.332e-01, -4.922e-03, -6.109e-01, -7.650e-01, -3.871e-01, 2.483e-01, 2.686e-01, -2.862e-01, 6.819e-02, 1.959e-01, -5.296e-02, 2.684e-01, 9.490e-02, -3.034e-01) * s0_2_0;
	r0 += M4(9.985e-02, 1.588e-01, -1.480e-01, 8.547e-02, 5.284e-01, -1.836e-01, 3.645e-01, -4.502e-01, 1.240e-01, 1.432e-03, -3.420e-01, -4.752e-03, -1.019e-01, 2.332e-01, 1.561e-01, 2.359e-02) * s0_2_1;
	r1 += M4(-1.491e-02, 2.789e-01, 4.207e-03, 2.173e-01, -4.611e-01, -3.182e-01, -4.701e-01, -6.798e-01, 8.896e-02, 4.910e-01, -6.111e-01, 2.969e-01, -1.632e-02, -3.045e-02, 1.735e-01, -1.460e-01) * s0_2_1;
	r2 += M4(5.411e-01, 6.771e-01, 5.960e-02, -1.098e-01, -1.000e+00, 1.000e+00, -3.470e-01, -6.647e-02, 8.325e-02, -6.503e-02, 3.272e-01, 1.815e-01, 4.219e-01, 2.079e-02, 3.382e-02, -1.443e-01) * s0_2_1;
	r0 += M4(-1.061e-01, 4.430e-02, -1.307e-01, 4.695e-02, -6.149e-02, 3.796e-02, 1.816e-01, -5.191e-03, 6.213e-02, -1.700e-02, 3.267e-02, 1.008e-01, 1.864e-01, -4.204e-03, -5.348e-02, -2.525e-01) * s0_2_2;
	r1 += M4(3.115e-01, -3.278e-02, 1.007e-01, 4.209e-01, -3.795e-01, -1.510e-01, -7.419e-02, -1.506e-01, -8.515e-02, 2.552e-01, -4.127e-02, 4.693e-02, 4.975e-01, -1.899e-01, 5.837e-02, 1.557e-01) * s0_2_2;
	r2 += M4(5.730e-02, -1.518e-01, 9.992e-02, 2.914e-02, -2.063e-01, -1.157e-01, -1.539e-01, 2.503e-02, -2.377e-01, -1.378e-01, 4.444e-02, -3.858e-02, -4.934e-02, -2.564e-01, 1.224e-01, 7.319e-02) * s0_2_2;
	r0 += V4(-2.899e-02, 4.178e-02, 4.237e-02, 2.289e-02);
	r0 = clamp(r0, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0));
	r1 += V4(-5.522e-03, 1.665e-02, -1.502e-02, 3.328e-02);
	r1 = clamp(r1, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r1));
	r2 += V4(-2.398e-02, 1.008e-01, 2.692e-04, -6.093e-03);
	r2 = clamp(r2, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(2, 0), vec4(r2));
}

//!DESC [CuNNy_3x12_DS] -conv2
//!HOOK LUMA
//!COMPUTE 24 8 8 8
//!BIND conv1
//!BIND LUMA
//!SAVE conv2
//!WIDTH LUMA.w 3 *
//!HEIGHT LUMA.h
//!COMPONENTS 4
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) V4((conv1_mul * texelFetch(conv1_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(0, 0), 0)))
#define l1(x, y) V4((conv1_mul * texelFetch(conv1_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(1, 0), 0)))
#define l2(x, y) V4((conv1_mul * texelFetch(conv1_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(2, 0), 0)))
shared V4 G[3][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(3, 1);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
			G[1][ay][ax] = l1(x - 1, y - 1);
			G[2][ay][ax] = l2(x - 1, y - 1);
		}
	}
	barrier();
	V4 s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2, s1_0_0, s1_0_1, s1_0_2, s1_1_0, s1_1_1, s1_1_2, s1_2_0, s1_2_1, s1_2_2;
	V4 r0, r1, r2;
	r0 = V4(0.0); r1 = V4(0.0); r2 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2]; s1_0_0 = G[1][xy.y+0][xy.x+0];
	s1_0_1 = G[1][xy.y+0][xy.x+1]; s1_0_2 = G[1][xy.y+0][xy.x+2];
	s1_1_0 = G[1][xy.y+1][xy.x+0]; s1_1_1 = G[1][xy.y+1][xy.x+1];
	s1_1_2 = G[1][xy.y+1][xy.x+2]; s1_2_0 = G[1][xy.y+2][xy.x+0];
	s1_2_1 = G[1][xy.y+2][xy.x+1]; s1_2_2 = G[1][xy.y+2][xy.x+2];
	r0 += M4(7.012e-02, 1.126e-01, 2.176e-01, 5.516e-01, 3.380e-02, 1.825e-01, 3.368e-02, 8.014e-02, -8.566e-02, -2.179e-02, -2.463e-01, -3.363e-01, 1.741e-02, 1.076e-01, -6.860e-02, -4.092e-02) * s0_0_0;
	r1 += M4(5.772e-02, -2.143e-02, -5.521e-02, 3.171e-02, 1.332e-01, -1.174e-01, -1.324e-02, -1.134e-01, 7.004e-02, -7.838e-02, 1.817e-01, 4.204e-01, -1.744e-01, 1.102e-01, 1.148e-02, -1.223e-01) * s0_0_0;
	r2 += M4(1.096e-02, 2.724e-02, 7.937e-02, -8.431e-02, -6.706e-02, 5.395e-02, -2.948e-02, -5.901e-02, 1.489e-01, 1.173e-01, 1.057e-01, -1.705e-01, -3.718e-02, 1.542e-01, -8.988e-02, 5.594e-02) * s0_0_0;
	r0 += M4(3.663e-02, 1.211e-01, -2.616e-01, -6.574e-01, 2.145e-02, -6.642e-02, -1.621e-02, 3.037e-01, -1.646e-01, 1.416e-01, -3.372e-01, -4.640e-01, 1.109e-01, 9.365e-02, -2.442e-01, 4.111e-03) * s0_0_1;
	r1 += M4(-2.222e-01, 1.968e-01, 2.035e-01, 1.722e-02, -1.847e-01, 5.187e-02, 2.570e-02, -2.891e-02, 6.781e-01, -1.618e-01, -2.870e-02, -4.351e-01, -7.004e-02, 2.978e-03, 1.303e-03, -1.104e-01) * s0_0_1;
	r2 += M4(1.341e-01, 7.256e-03, -1.227e-01, -2.647e-01, 1.292e-02, 2.116e-02, 5.682e-02, 1.266e-01, 7.513e-02, 5.884e-02, 4.223e-01, -1.478e-01, 2.754e-02, 7.475e-02, -4.575e-02, 7.509e-02) * s0_0_1;
	r0 += M4(8.217e-02, 6.430e-03, 4.210e-02, 2.185e-01, -8.888e-02, -8.183e-02, -3.268e-02, -5.203e-02, 4.375e-02, -4.311e-02, -9.360e-03, 9.065e-02, 1.457e-02, -1.504e-01, 1.324e-01, 1.771e-01) * s0_0_2;
	r1 += M4(2.721e-01, 9.302e-03, -2.389e-02, -2.212e-01, -1.995e-01, -1.236e-01, -2.853e-02, -2.116e-01, 1.322e-01, -5.408e-02, 1.175e-01, -3.235e-01, -2.549e-01, -1.380e-01, -1.470e-01, -6.185e-01) * s0_0_2;
	r2 += M4(1.159e-02, -8.970e-02, 2.720e-02, -1.076e-01, 4.703e-03, 5.889e-02, 2.110e-02, 6.515e-02, -3.695e-02, -2.276e-01, 8.215e-02, 1.876e-01, 2.500e-03, 7.471e-02, -1.178e-01, -5.629e-02) * s0_0_2;
	r0 += M4(4.748e-02, 1.506e-01, 5.519e-01, 5.633e-01, 5.045e-02, 3.796e-01, -3.868e-01, -7.363e-01, 7.776e-02, 2.705e-01, 5.759e-01, 1.251e-01, -1.187e-02, -2.186e-01, -3.837e-01, -2.538e-01) * s0_1_0;
	r1 += M4(1.209e-01, -4.218e-01, -2.432e-01, -3.834e-01, 3.138e-01, -6.451e-02, -4.189e-02, -3.725e-01, -9.506e-02, 3.150e-01, 9.355e-02, -4.262e-05, 1.413e-01, 2.731e-02, 1.811e-02, -9.261e-03) * s0_1_0;
	r2 += M4(-1.466e-01, -1.843e-01, -4.016e-02, 8.574e-02, -8.868e-02, -5.244e-02, -1.450e-01, 3.126e-03, 2.509e-01, -1.019e-01, 1.649e-02, 7.696e-02, -1.174e-01, -1.519e-02, 3.161e-02, -3.747e-03) * s0_1_0;
	r0 += M4(7.723e-02, 3.345e-01, -4.757e-01, 2.026e-01, 1.597e-01, 3.389e-01, -3.513e-02, -1.000e+00, -5.097e-01, -4.321e-01, 3.063e-01, -5.600e-02, 1.831e-01, 9.177e-02, -1.000e+00, -8.143e-01) * s0_1_1;
	r1 += M4(-8.395e-01, 1.026e-01, 8.283e-01, -5.161e-01, 2.747e-01, 2.205e-01, -4.197e-02, -1.000e+00, -8.262e-01, -2.018e-01, -8.306e-02, 4.209e-01, 6.502e-01, 1.556e-01, -7.057e-01, 3.231e-01) * s0_1_1;
	r2 += M4(-1.182e-01, -1.572e-01, 2.749e-01, -5.262e-01, 7.163e-02, -9.652e-02, -5.188e-01, 7.174e-02, -1.870e-01, -3.896e-01, -4.890e-02, -2.195e-01, 9.107e-02, -1.841e-02, -1.051e-01, 4.236e-02) * s0_1_1;
	r0 += M4(-1.867e-01, -4.665e-02, 1.104e-01, 1.508e-01, 1.746e-01, 6.206e-02, -4.210e-02, -1.897e-01, 2.114e-01, 5.544e-02, 9.352e-02, 2.007e-01, 5.319e-01, 3.497e-02, 1.589e-01, 2.284e-02) * s0_1_2;
	r1 += M4(-3.068e-01, -1.580e-01, -2.728e-01, -3.152e-01, 4.793e-02, 8.441e-03, -8.422e-03, -1.386e-01, 2.157e-01, -2.301e-02, -1.780e-01, 1.695e-01, 5.214e-01, 3.794e-02, -2.699e-01, -1.348e-01) * s0_1_2;
	r2 += M4(8.688e-03, 6.016e-02, 7.052e-02, 1.065e-02, -1.566e-02, 1.501e-02, 1.203e-02, 2.659e-02, -5.580e-03, -8.962e-02, 3.428e-02, 1.101e-01, -1.369e-01, -8.231e-02, -4.623e-02, 2.046e-01) * s0_1_2;
	r0 += M4(-9.066e-03, 1.529e-01, -2.036e-01, -2.317e-01, 8.316e-03, 2.359e-01, -3.428e-01, -1.862e-01, 2.202e-02, 6.994e-02, 8.388e-02, 4.960e-02, -6.514e-02, -1.431e-01, -4.204e-02, -1.751e-01) * s0_2_0;
	r1 += M4(-1.622e-01, -1.978e-01, 3.277e-02, 3.504e-02, -1.403e-01, 1.940e-01, -5.833e-02, 1.683e-01, 5.208e-02, 1.877e-01, 9.106e-02, 2.896e-02, 5.111e-02, -2.910e-02, -6.216e-02, 1.968e-02) * s0_2_0;
	r2 += M4(-1.620e-01, -1.086e-01, -3.901e-02, 4.318e-02, -1.739e-03, -3.500e-01, -4.122e-02, -8.018e-02, 1.136e-01, 9.504e-02, 4.164e-02, -8.618e-02, 1.335e-02, -1.242e-01, -5.224e-02, 1.881e-02) * s0_2_0;
	r0 += M4(6.272e-02, -8.957e-02, -5.366e-01, -2.835e-01, -2.607e-01, 1.639e-01, 1.729e-01, -5.102e-02, -6.749e-02, -3.459e-02, -9.283e-02, -2.498e-02, -1.519e-02, -8.638e-02, 8.005e-02, 9.848e-02) * s0_2_1;
	r1 += M4(2.584e-01, -1.442e-01, -8.149e-02, -2.978e-01, -5.407e-01, -1.401e-01, -2.564e-01, -1.526e-01, -3.141e-01, -7.804e-02, -1.477e-02, 1.504e-02, 6.132e-04, 9.841e-02, -8.912e-02, 1.694e-01) * s0_2_1;
	r2 += M4(7.590e-02, -2.102e-01, -9.911e-02, 1.883e-01, -4.853e-02, 2.227e-01, -8.416e-02, 9.741e-02, -2.269e-02, 2.753e-01, -1.605e-02, 5.650e-02, -6.526e-03, -3.701e-01, -4.201e-02, -2.907e-03) * s0_2_1;
	r0 += M4(2.066e-01, -1.092e-01, -3.911e-03, 4.380e-03, 1.061e-01, 5.173e-03, -4.636e-02, -1.161e-01, 6.316e-02, 4.301e-02, 6.044e-02, 4.180e-02, 2.200e-02, 1.863e-01, 1.879e-01, 2.752e-01) * s0_2_2;
	r1 += M4(3.574e-02, -6.979e-02, -1.401e-01, -8.138e-02, -5.161e-03, -1.061e-01, 3.295e-02, -1.122e-01, -1.066e-01, 7.348e-02, 1.592e-02, 5.351e-02, 2.481e-01, 1.741e-01, 6.372e-02, 5.157e-02) * s0_2_2;
	r2 += M4(-1.534e-02, 1.580e-02, -2.445e-02, 7.971e-02, -9.500e-02, -6.045e-02, 4.402e-02, -6.704e-02, 6.672e-02, 1.647e-01, 2.197e-02, 1.003e-02, 7.320e-03, -4.431e-01, 3.280e-02, -7.753e-02) * s0_2_2;
	r0 += M4(-5.532e-02, -3.725e-02, 6.094e-02, 1.802e-01, -1.429e-02, 3.132e-01, -1.060e-01, -5.441e-01, 2.969e-02, 1.259e-01, -3.546e-02, 1.311e-01, 5.215e-03, -3.986e-02, -4.301e-02, -2.272e-01) * s1_0_0;
	r1 += M4(-5.190e-02, 2.538e-02, 2.050e-02, 1.483e-01, -5.717e-02, 4.783e-01, 1.062e-01, 1.185e-01, 2.455e-01, 1.879e-01, 1.175e-02, 1.027e-01, 7.426e-02, -2.646e-02, -2.129e-02, -6.666e-02) * s1_0_0;
	r2 += M4(-3.624e-02, 3.071e-02, 4.916e-02, 1.234e-02, -7.201e-02, 2.897e-01, -6.365e-02, 1.561e-02, 8.210e-02, 1.093e-01, -1.222e-01, -1.180e-02, 1.790e-02, -4.402e-02, -9.092e-02, 1.067e-01) * s1_0_0;
	r0 += M4(2.249e-03, -1.029e-01, 8.400e-02, 1.284e-01, 1.199e-01, -4.248e-01, 1.490e-01, 2.949e-01, 4.141e-02, -1.675e-01, 1.230e-01, 5.846e-02, -7.110e-02, -2.327e-02, 3.967e-02, 7.705e-02) * s1_0_1;
	r1 += M4(1.665e-02, 4.506e-02, 7.885e-02, -7.336e-02, -3.227e-01, -6.486e-01, -3.386e-01, -1.495e-01, 2.227e-01, -1.265e-01, 3.040e-02, -1.443e-01, -2.546e-01, 7.250e-02, -1.067e-01, 1.608e-01) * s1_0_1;
	r2 += M4(4.083e-02, 8.139e-02, 1.253e-01, -8.904e-02, -1.223e-01, 6.203e-02, -6.881e-03, 7.347e-01, 3.409e-02, -8.591e-02, -8.442e-03, -9.643e-02, -3.424e-02, 4.505e-02, -7.054e-02, -2.551e-02) * s1_0_1;
	r0 += M4(7.562e-02, -8.270e-02, 3.634e-02, 2.783e-01, -2.826e-01, -1.343e-01, -2.751e-02, 8.313e-02, 6.702e-02, 5.261e-02, 1.134e-01, 3.267e-01, 2.614e-03, -7.538e-02, -1.020e-01, -3.172e-01) * s1_0_2;
	r1 += M4(-1.281e-01, 6.974e-02, 1.141e-02, 8.253e-02, -8.097e-02, -7.174e-02, -6.998e-02, -9.479e-02, -1.545e-02, 4.912e-02, -8.223e-02, 1.264e-01, -2.165e-01, 1.294e-01, -1.064e-02, -1.141e-01) * s1_0_2;
	r2 += M4(3.516e-02, 1.228e-01, 1.074e-01, 1.159e-01, -2.925e-02, -3.540e-02, 1.264e-02, 9.473e-02, 5.417e-02, 4.846e-02, -2.627e-02, -1.989e-01, -8.989e-03, -1.135e-02, -2.785e-02, -1.548e-01) * s1_0_2;
	r0 += M4(4.432e-02, 2.793e-02, -3.498e-03, 6.733e-03, -6.870e-02, -2.717e-01, -3.685e-01, -2.095e-01, -9.643e-02, -1.136e-01, -1.841e-01, 1.372e-02, -1.611e-02, -1.308e-01, -1.530e-01, -6.643e-02) * s1_1_0;
	r1 += M4(1.665e-01, 3.208e-01, 2.236e-01, 4.027e-01, 2.398e-01, 1.820e-02, 8.618e-02, 2.216e-01, 1.838e-01, 2.638e-01, -8.657e-02, -2.409e-02, 1.658e-01, -2.139e-01, -1.558e-01, -2.939e-01) * s1_1_0;
	r2 += M4(1.153e-01, 2.125e-01, 8.112e-02, -9.354e-02, -1.909e-01, 3.230e-01, 9.693e-02, 3.238e-02, -2.654e-02, 2.884e-02, 2.276e-04, 6.715e-02, -1.802e-01, -2.105e-01, 3.196e-02, 5.774e-02) * s1_1_0;
	r0 += M4(8.314e-03, 1.346e-01, 2.737e-01, 2.741e-01, 4.533e-01, 4.021e-01, -8.811e-02, 7.695e-02, 2.497e-02, 2.228e-01, 4.221e-01, -3.682e-01, -2.839e-03, -5.713e-01, 3.369e-01, 8.351e-02) * s1_1_1;
	r1 += M4(1.014e-01, 1.039e-01, -1.296e-02, 5.646e-01, 8.473e-01, 5.148e-01, 1.812e-01, -1.402e-01, -2.051e-01, -4.952e-01, -1.082e-01, -3.100e-01, -3.601e-01, -4.336e-01, 1.188e-01, 2.028e-01) * s1_1_1;
	r2 += M4(2.483e-01, 3.580e-01, 1.762e-01, 1.697e-01, 3.744e-01, -3.211e-01, 3.271e-01, -3.263e-01, 2.998e-01, 4.856e-01, -1.946e-01, -1.416e-03, 6.393e-02, -3.994e-01, -1.200e-01, -1.905e-01) * s1_1_1;
	r0 += M4(1.748e-02, 9.901e-02, 4.853e-02, 2.652e-02, -1.535e-01, 1.459e-01, -1.541e-02, -1.832e-01, -3.947e-01, -2.329e-02, 1.588e-02, 2.722e-01, -1.255e-01, -2.016e-02, -4.753e-02, 1.979e-01) * s1_1_2;
	r1 += M4(1.925e-01, -2.129e-03, -6.244e-02, 1.210e-01, -8.490e-03, 6.094e-02, -1.106e-01, -5.641e-02, 5.098e-03, -8.407e-02, -1.863e-03, 3.233e-01, 2.658e-01, 1.377e-01, 9.006e-02, 2.561e-01) * s1_1_2;
	r2 += M4(6.630e-02, 1.689e-01, 1.703e-02, -4.961e-03, -6.662e-02, 1.254e-01, -6.519e-02, -6.599e-02, 1.153e-02, -2.786e-02, -3.147e-02, -1.171e-01, -4.161e-02, -1.888e-01, -7.578e-02, 7.860e-02) * s1_1_2;
	r0 += M4(9.429e-02, -1.124e-01, 1.743e-01, -1.217e-01, -1.549e-02, -7.708e-02, 9.226e-02, -1.587e-01, -2.101e-02, -4.676e-02, -9.084e-02, 5.792e-02, -2.441e-03, -1.342e-01, 3.870e-02, 1.170e-01) * s1_2_0;
	r1 += M4(1.428e-01, 2.135e-01, 9.173e-02, 2.382e-01, -4.129e-02, 9.006e-02, -2.278e-02, 2.282e-01, -1.921e-02, 1.124e-01, -9.145e-03, -1.154e-02, -2.710e-02, -2.547e-01, 9.805e-02, -6.604e-02) * s1_2_0;
	r2 += M4(6.919e-02, 1.087e-01, 5.290e-02, -3.393e-02, 8.670e-03, -2.375e-01, -3.937e-02, -1.759e-03, 1.764e-02, 2.022e-01, -5.612e-03, -2.573e-02, 4.860e-02, -1.760e-01, 2.971e-02, -2.155e-02) * s1_2_0;
	r0 += M4(1.285e-02, -6.428e-02, 2.478e-01, -1.039e-02, 2.480e-02, 4.338e-02, 1.349e-01, -1.269e-01, 7.345e-02, 1.151e-01, 2.364e-01, -3.322e-02, 2.472e-02, 5.603e-02, 2.529e-01, 3.527e-01) * s1_2_1;
	r1 += M4(2.668e-01, 2.153e-01, -1.259e-02, 1.255e-01, 3.346e-02, 2.600e-02, -7.217e-02, 8.175e-02, 2.661e-01, 2.815e-01, -5.699e-03, 6.873e-03, -6.193e-01, 3.695e-02, -2.923e-01, -1.766e-01) * s1_2_1;
	r2 += M4(5.382e-02, 4.204e-02, 1.744e-01, -1.230e-01, 4.229e-02, -2.514e-01, 1.179e-02, 7.486e-02, 1.934e-01, -9.629e-02, -6.616e-02, -4.695e-02, -8.383e-02, 5.646e-01, -2.461e-02, -3.574e-03) * s1_2_1;
	r0 += M4(-8.855e-03, 2.187e-02, 1.556e-01, 3.943e-02, -9.058e-02, 2.833e-02, -1.323e-01, 2.595e-02, -3.490e-02, 7.051e-02, 1.140e-01, -2.621e-02, -7.350e-02, -1.298e-02, -3.985e-02, 9.358e-02) * s1_2_2;
	r1 += M4(2.963e-01, 6.569e-02, 3.220e-02, 2.242e-01, 9.568e-02, 1.179e-02, 2.695e-02, 1.189e-02, -5.574e-02, 1.304e-01, -5.723e-02, 6.665e-02, -3.843e-01, -1.069e-01, 1.285e-01, -1.597e-01) * s1_2_2;
	r2 += M4(3.654e-02, 6.866e-02, -1.144e-02, 7.698e-02, -5.627e-02, -3.094e-01, -6.904e-02, 4.363e-03, 7.684e-02, 4.482e-01, -1.927e-03, -8.766e-02, 2.890e-02, 1.592e-02, 9.603e-02, -2.310e-01) * s1_2_2;
	s0_0_0 = G[2][xy.y+0][xy.x+0]; s0_0_1 = G[2][xy.y+0][xy.x+1];
	s0_0_2 = G[2][xy.y+0][xy.x+2]; s0_1_0 = G[2][xy.y+1][xy.x+0];
	s0_1_1 = G[2][xy.y+1][xy.x+1]; s0_1_2 = G[2][xy.y+1][xy.x+2];
	s0_2_0 = G[2][xy.y+2][xy.x+0]; s0_2_1 = G[2][xy.y+2][xy.x+1];
	s0_2_2 = G[2][xy.y+2][xy.x+2];
	r0 += M4(3.538e-02, -2.394e-01, -9.038e-02, 2.950e-02, -5.893e-02, -9.347e-02, 1.804e-01, 4.668e-01, -2.053e-02, -2.865e-02, 7.985e-02, 4.480e-01, 1.470e-02, -1.959e-02, 7.480e-02, -1.273e-01) * s0_0_0;
	r1 += M4(1.103e-01, -9.596e-02, 1.886e-01, 2.270e-01, -1.323e-01, -7.596e-02, -4.931e-02, 7.365e-02, 9.785e-03, 2.049e-01, -1.158e-01, -7.215e-02, -6.311e-02, -2.517e-02, -2.068e-02, 1.049e-01) * s0_0_0;
	r2 += M4(1.247e-01, -2.826e-02, 1.675e-01, -1.095e-01, 4.454e-02, 7.024e-02, 1.840e-02, -4.131e-02, -4.903e-02, 2.436e-02, -1.410e-01, 1.709e-01, -1.448e-02, -7.769e-03, -1.351e-02, -5.695e-02) * s0_0_0;
	r0 += M4(-7.499e-02, -8.021e-02, 2.320e-02, -9.601e-03, -3.547e-02, -7.128e-02, 3.034e-02, -6.960e-02, 6.574e-02, 1.242e-01, 9.605e-03, 7.365e-03, -9.212e-03, -2.852e-01, -1.367e-01, -1.258e-01) * s0_0_1;
	r1 += M4(4.463e-01, -4.957e-02, 2.493e-02, -9.186e-02, -1.587e-01, 1.359e-02, 1.707e-01, 1.156e-01, -4.388e-01, 1.561e-01, 5.075e-02, -2.460e-01, 1.281e-01, -6.826e-02, 1.408e-02, -7.358e-03) * s0_0_1;
	r2 += M4(6.623e-02, 5.930e-02, 7.158e-02, 9.790e-02, -1.271e-02, 1.046e-01, 1.480e-01, -1.480e-01, -3.189e-02, 1.007e-01, -4.077e-01, 1.791e-01, -5.052e-02, -1.362e-01, 6.998e-02, -9.506e-02) * s0_0_1;
	r0 += M4(3.000e-03, -6.335e-02, 1.661e-02, 4.773e-02, 1.487e-02, 2.440e-02, -1.545e-03, 8.560e-02, 1.939e-01, -6.308e-02, 7.670e-03, 6.134e-02, -1.562e-02, 1.686e-01, 3.916e-02, -1.206e-01) * s0_0_2;
	r1 += M4(8.700e-02, -1.303e-01, 2.544e-04, -2.922e-02, 3.736e-02, 3.698e-02, -3.222e-03, 2.172e-01, -2.376e-01, 1.256e-01, -2.153e-02, 1.723e-01, -1.243e-01, -7.441e-02, -3.680e-02, -2.361e-01) * s0_0_2;
	r2 += M4(4.728e-02, -7.301e-02, 1.411e-01, 1.187e-01, 5.429e-02, 3.344e-02, 5.475e-02, -6.537e-02, -8.104e-02, 4.164e-03, -2.910e-01, 3.977e-02, -3.627e-02, 8.495e-02, -3.714e-02, -3.135e-01) * s0_0_2;
	r0 += M4(1.043e-02, 7.594e-02, 8.667e-02, 1.373e-02, -7.656e-02, -9.765e-02, 2.709e-01, 6.455e-01, -4.632e-03, -1.229e-01, -2.579e-01, 1.643e-01, 4.270e-02, 1.012e-01, -6.961e-02, 5.769e-02) * s0_1_0;
	r1 += M4(-5.375e-01, 1.498e-01, 1.048e-01, 8.034e-02, -1.831e-01, 8.137e-02, 6.970e-02, 9.598e-02, 2.067e-01, -5.609e-01, 6.446e-02, -2.771e-01, 2.656e-03, 2.882e-02, -2.245e-02, 2.135e-02) * s0_1_0;
	r2 += M4(2.411e-01, 3.792e-01, -4.897e-02, 5.865e-02, 5.832e-02, 1.113e-01, 7.017e-02, -8.043e-02, -6.551e-02, -2.443e-01, -3.411e-02, 7.818e-02, -9.878e-03, -7.383e-02, -4.994e-02, 3.381e-02) * s0_1_0;
	r0 += M4(-4.310e-02, 2.638e-01, -2.318e-01, 3.154e-01, -3.902e-02, -1.046e-01, 5.675e-02, 2.496e-01, 5.868e-02, -5.064e-01, 1.427e-02, 4.885e-01, 1.129e-01, 2.377e-01, 3.440e-02, -5.232e-01) * s0_1_1;
	r1 += M4(1.025e-01, 2.766e-01, 3.384e-02, -5.025e-01, -1.409e-01, -5.886e-02, 1.241e-01, 6.623e-01, -7.795e-01, -4.733e-01, 2.668e-01, 3.089e-01, -1.340e-01, 6.488e-02, 8.621e-02, -1.191e-01) * s0_1_1;
	r2 += M4(-1.232e-02, 1.114e-01, 3.363e-01, 6.620e-02, -4.713e-02, 2.239e-01, 2.009e-02, -4.919e-02, -1.451e-01, -1.150e-01, -1.975e-01, -2.005e-02, 1.665e-01, 5.558e-03, -3.305e-02, -2.336e-02) * s0_1_1;
	r0 += M4(2.273e-01, -2.359e-01, 2.158e-01, -5.716e-02, -6.402e-03, -7.295e-02, -9.591e-02, 1.077e-01, -2.450e-01, 7.953e-02, -1.431e-01, 2.544e-01, -2.230e-01, -1.178e-02, -1.381e-01, -8.038e-01) * s0_1_2;
	r1 += M4(-6.595e-01, -1.259e-01, -3.901e-02, -9.943e-02, -7.838e-02, 4.643e-02, 5.761e-02, 6.255e-02, -5.456e-01, 5.704e-02, 2.443e-01, 8.534e-02, -3.095e-01, 1.052e-01, 1.406e-01, -1.000e+00) * s0_1_2;
	r2 += M4(6.496e-02, 1.055e-01, -1.165e-01, 5.095e-03, 1.007e-01, 3.079e-02, 3.034e-02, -2.375e-02, 3.263e-02, -1.422e-01, 6.925e-02, 5.357e-02, 2.587e-01, -3.741e-01, -1.336e-01, 1.863e-01) * s0_1_2;
	r0 += M4(-2.455e-02, -1.119e-02, -2.531e-01, -2.646e-01, 1.211e-02, -1.243e-02, 4.079e-02, 9.803e-02, 2.751e-02, -1.153e-01, 3.714e-02, 2.082e-01, -5.930e-03, -3.002e-02, -3.850e-02, -4.844e-02) * s0_2_0;
	r1 += M4(-1.381e-01, -2.369e-01, 2.758e-02, -1.098e-03, -5.202e-02, -1.938e-03, 6.005e-02, 7.970e-02, 1.812e-01, -2.592e-01, -1.909e-02, -9.423e-02, 1.530e-02, -1.649e-03, -1.427e-02, 2.368e-02) * s0_2_0;
	r2 += M4(-1.050e-02, -6.068e-02, 2.839e-02, 5.326e-02, 3.141e-02, 3.408e-01, 6.598e-02, 1.547e-02, -7.814e-02, -4.564e-02, 4.345e-02, 5.459e-02, -4.760e-02, 2.508e-02, 1.767e-02, -4.357e-02) * s0_2_0;
	r0 += M4(1.517e-01, -1.990e-01, -3.233e-01, -2.704e-01, -2.234e-02, 1.345e-02, -6.117e-02, 2.010e-02, 4.287e-02, -9.426e-04, 3.236e-02, 2.134e-01, -6.664e-03, -1.475e-01, -2.175e-01, -9.067e-02) * s0_2_1;
	r1 += M4(3.685e-01, -2.963e-02, 9.885e-02, 1.223e-01, -1.632e-01, 4.832e-02, 1.406e-01, 8.648e-02, 5.580e-02, -1.611e-01, 1.984e-01, -1.431e-01, 1.905e-01, -5.487e-02, -4.867e-02, -1.683e-01) * s0_2_1;
	r2 += M4(1.112e-01, -2.705e-01, -5.303e-02, 1.466e-01, 6.905e-02, 2.857e-01, 8.910e-02, 2.665e-04, -6.664e-02, 1.000e+00, 1.098e-01, -2.360e-01, -7.060e-02, -6.154e-02, 2.448e-02, 5.614e-02) * s0_2_1;
	r0 += M4(-2.027e-02, -2.588e-01, 8.913e-02, -1.749e-01, 2.066e-02, 1.174e-02, -2.558e-02, 5.190e-02, -8.639e-02, 1.342e-01, 1.782e-02, 1.049e-01, 5.028e-02, -2.032e-02, 1.928e-01, -5.844e-02) * s0_2_2;
	r1 += M4(1.269e-02, -2.633e-02, -1.861e-01, 2.484e-01, 9.196e-02, 6.169e-02, 1.541e-02, 1.640e-02, -7.824e-02, 1.392e-01, 4.403e-02, 4.607e-03, 6.576e-02, -1.086e-01, 7.407e-02, 3.940e-02) * s0_2_2;
	r2 += M4(3.731e-02, 4.505e-02, -3.380e-02, 2.493e-01, 2.620e-02, 4.487e-02, 2.159e-02, 2.234e-02, 9.797e-02, 7.631e-02, 4.546e-02, -9.725e-03, 3.917e-02, -2.203e-01, -5.702e-02, 8.007e-02) * s0_2_2;
	r0 += V4(-2.146e-02, -5.026e-02, 4.526e-03, 7.379e-03);
	r0 = clamp(r0, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0));
	r1 += V4(-3.941e-02, -3.429e-02, -1.140e-02, -1.707e-02);
	r1 = clamp(r1, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r1));
	r2 += V4(-1.822e-02, -2.728e-02, -2.755e-04, -6.827e-03);
	r2 = clamp(r2, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(2, 0), vec4(r2));
}

//!DESC [CuNNy_3x12_DS] -conv3
//!HOOK LUMA
//!COMPUTE 24 8 8 8
//!BIND conv2
//!BIND LUMA
//!SAVE conv3
//!WIDTH LUMA.w 3 *
//!HEIGHT LUMA.h
//!COMPONENTS 4
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) V4((conv2_mul * texelFetch(conv2_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(0, 0), 0)))
#define l1(x, y) V4((conv2_mul * texelFetch(conv2_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(1, 0), 0)))
#define l2(x, y) V4((conv2_mul * texelFetch(conv2_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(2, 0), 0)))
shared V4 G[3][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(3, 1);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
			G[1][ay][ax] = l1(x - 1, y - 1);
			G[2][ay][ax] = l2(x - 1, y - 1);
		}
	}
	barrier();
	V4 s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2, s1_0_0, s1_0_1, s1_0_2, s1_1_0, s1_1_1, s1_1_2, s1_2_0, s1_2_1, s1_2_2;
	V4 r0, r1, r2;
	r0 = V4(0.0); r1 = V4(0.0); r2 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2]; s1_0_0 = G[1][xy.y+0][xy.x+0];
	s1_0_1 = G[1][xy.y+0][xy.x+1]; s1_0_2 = G[1][xy.y+0][xy.x+2];
	s1_1_0 = G[1][xy.y+1][xy.x+0]; s1_1_1 = G[1][xy.y+1][xy.x+1];
	s1_1_2 = G[1][xy.y+1][xy.x+2]; s1_2_0 = G[1][xy.y+2][xy.x+0];
	s1_2_1 = G[1][xy.y+2][xy.x+1]; s1_2_2 = G[1][xy.y+2][xy.x+2];
	r0 += M4(-6.909e-02, 8.911e-02, -6.640e-03, 9.144e-02, 3.236e-02, 2.090e-02, 2.590e-02, -1.195e-02, -2.532e-02, 3.517e-02, -4.286e-03, 1.772e-02, -1.689e-02, -3.412e-02, 3.878e-02, -2.706e-03) * s0_0_0;
	r1 += M4(1.630e-01, 1.037e-01, 3.805e-01, -7.093e-02, 2.830e-02, -4.193e-02, -2.657e-02, -3.305e-03, 8.390e-02, 2.437e-02, 8.107e-02, -2.587e-02, 3.794e-02, -2.226e-02, -4.356e-02, 5.968e-03) * s0_0_0;
	r2 += M4(5.026e-02, -1.228e-01, 2.071e-01, 9.283e-02, -1.267e-02, -6.263e-03, 2.039e-02, 6.072e-02, 2.443e-02, -2.194e-04, 9.373e-02, 4.074e-02, -2.205e-02, 6.643e-03, 3.188e-02, 3.643e-02) * s0_0_0;
	r0 += M4(8.870e-03, -8.160e-02, -1.841e-02, -9.937e-02, -4.532e-02, 3.309e-02, 4.907e-02, 7.706e-03, 2.377e-02, 3.764e-02, 1.401e-02, 4.349e-03, -4.022e-02, -1.565e-02, -8.915e-03, 3.565e-03) * s0_0_1;
	r1 += M4(2.851e-02, -1.053e-01, 4.453e-02, -5.109e-02, -5.204e-02, -2.342e-02, 2.583e-01, -5.198e-02, -3.589e-01, -7.940e-02, -3.433e-01, 9.533e-03, -3.489e-01, -5.172e-02, -1.616e-01, -4.280e-02) * s0_0_1;
	r2 += M4(2.827e-02, -9.252e-02, 1.137e-01, 2.085e-02, 2.191e-02, -1.314e-02, -7.436e-02, -1.647e-02, 2.108e-01, -4.009e-02, -1.854e-01, -3.623e-01, -3.020e-02, 3.508e-03, -9.910e-03, -1.257e-01) * s0_0_1;
	r0 += M4(-1.679e-02, -8.927e-03, 4.454e-02, -8.960e-03, -4.389e-02, 6.665e-02, -5.225e-02, 4.675e-02, -7.205e-02, 4.163e-02, 3.671e-02, 4.302e-02, -4.983e-02, 2.985e-03, -2.207e-02, -6.501e-03) * s0_0_2;
	r1 += M4(-1.408e-02, -7.754e-03, -7.501e-04, -2.692e-02, -2.450e-02, -5.516e-02, -1.205e-01, -5.936e-02, -1.183e-02, -4.357e-02, -1.835e-02, 6.722e-02, -4.788e-02, 1.309e-02, -1.487e-02, -5.388e-02) * s0_0_2;
	r2 += M4(3.821e-02, 2.844e-03, -7.871e-03, 5.501e-03, 7.342e-02, -6.189e-03, 6.487e-02, -1.081e-01, 1.266e-01, -1.568e-01, -8.206e-02, -1.485e-01, -1.881e-03, -2.875e-02, -3.950e-02, -1.956e-02) * s0_0_2;
	r0 += M4(1.296e-03, 3.367e-02, -2.645e-02, -3.441e-01, -4.267e-02, 6.817e-02, -8.799e-02, 4.361e-02, 3.046e-02, 1.418e-02, -7.138e-02, -3.177e-02, -2.983e-02, 2.136e-02, 7.886e-03, 1.524e-02) * s0_1_0;
	r1 += M4(5.281e-01, 3.040e-01, -4.326e-01, 1.754e-01, -8.031e-02, 1.146e-01, -3.992e-02, 6.555e-02, -8.516e-02, -4.627e-01, -4.740e-02, 3.033e-02, 5.334e-02, -1.995e-01, -4.965e-02, 7.556e-03) * s0_1_0;
	r2 += M4(1.601e-01, -1.267e-02, 4.465e-01, -1.489e-01, -2.215e-03, -2.899e-02, -5.945e-02, -7.591e-02, 3.131e-02, -5.169e-02, -1.403e-02, 8.579e-03, -2.548e-02, -2.493e-02, 2.658e-02, -7.905e-02) * s0_1_0;
	r0 += M4(2.328e-01, 1.316e-02, -1.213e-02, 2.437e-01, 1.737e-01, -2.097e-01, 1.240e-01, 1.342e-01, -2.773e-01, -2.120e-01, 3.777e-02, -1.062e-01, -6.529e-03, -7.251e-02, -2.136e-01, -6.691e-02) * s0_1_1;
	r1 += M4(-4.209e-01, 6.565e-03, -1.579e-01, -2.924e-03, -2.907e-02, -2.946e-02, -4.111e-01, 3.514e-02, -6.716e-01, 1.611e-03, -3.242e-02, -1.370e-01, -3.958e-01, -9.528e-03, -6.363e-02, -6.619e-02) * s0_1_1;
	r2 += M4(-1.054e-02, 1.841e-01, 2.084e-01, -7.370e-02, -3.241e-02, 2.895e-01, 2.510e-01, 9.244e-02, 2.153e-01, -1.683e-01, -2.631e-01, 2.348e-02, -1.106e-01, -1.214e-01, -3.886e-02, 5.088e-02) * s0_1_1;
	r0 += M4(1.567e-02, 3.808e-02, 2.955e-02, 1.100e-02, -4.557e-02, -9.472e-03, -1.632e-01, -2.822e-01, 1.876e-02, -2.725e-01, -2.046e-01, -2.721e-01, -3.596e-02, -3.817e-02, -2.356e-02, -4.112e-02) * s0_1_2;
	r1 += M4(4.852e-02, -1.568e-02, -2.596e-02, 2.633e-02, 8.227e-02, -2.299e-02, -2.112e-02, 2.752e-02, 8.763e-03, -2.111e-03, -3.702e-03, -1.935e-01, -4.037e-02, 2.044e-02, -1.229e-01, -3.725e-04) * s0_1_2;
	r2 += M4(3.750e-02, 3.430e-02, 2.806e-02, -1.691e-02, 1.756e-01, 4.389e-02, -3.556e-02, 2.852e-02, -8.046e-02, -1.431e-01, -6.561e-02, -3.395e-04, -1.898e-02, -5.677e-02, -2.409e-02, -8.130e-02) * s0_1_2;
	r0 += M4(3.521e-02, -1.411e-01, -2.265e-01, -2.851e-02, 7.742e-02, -3.322e-03, 2.992e-02, -4.895e-02, -2.293e-04, -1.145e-02, -4.597e-02, 8.090e-03, -8.319e-02, -4.555e-02, 8.238e-02, -1.506e-02) * s0_2_0;
	r1 += M4(1.227e-01, 2.818e-03, 5.340e-02, -1.949e-01, -8.668e-02, 8.838e-02, -1.157e-02, -2.813e-02, -2.493e-02, -2.860e-01, -4.473e-03, 2.001e-02, -5.378e-03, -8.884e-05, 3.834e-02, -2.877e-02) * s0_2_0;
	r2 += M4(8.164e-02, -5.181e-02, 1.244e-01, 1.514e-02, 2.276e-02, 1.045e-02, -4.979e-02, -4.219e-03, 1.308e-02, 2.094e-02, -2.101e-02, -2.388e-03, -4.198e-02, -1.152e-02, -2.477e-02, 1.489e-02) * s0_2_0;
	r0 += M4(3.990e-02, 3.141e-03, 2.094e-01, -2.388e-01, 2.301e-03, 4.711e-02, -3.663e-01, -1.518e-01, -8.763e-02, -1.381e-01, -2.378e-01, 6.014e-02, -8.773e-02, -1.250e-02, -1.450e-01, -5.583e-02) * s0_2_1;
	r1 += M4(-4.213e-02, 6.018e-03, 2.249e-02, -1.933e-02, -1.134e-02, -3.070e-01, 3.761e-02, 1.212e-01, -1.337e-01, -5.197e-02, 1.523e-02, 4.404e-03, -6.835e-04, -7.156e-03, 7.397e-02, -1.714e-01) * s0_2_1;
	r2 += M4(4.334e-02, -1.802e-01, 2.757e-02, -1.460e-02, 9.861e-03, -1.971e-01, 2.243e-02, -1.444e-02, 7.418e-02, 3.025e-02, -6.106e-02, 6.825e-03, -2.322e-01, -2.330e-01, 5.336e-02, -5.942e-02) * s0_2_1;
	r0 += M4(-3.685e-02, 2.425e-03, -8.452e-02, -1.777e-02, 1.360e-02, 1.737e-02, -7.103e-02, 2.348e-01, -4.000e-02, 2.472e-02, 5.273e-02, -6.166e-03, -3.331e-02, -7.962e-02, -3.448e-02, -3.082e-02) * s0_2_2;
	r1 += M4(-7.622e-03, -2.149e-02, -2.865e-02, 2.493e-03, 1.931e-02, 8.309e-02, 7.985e-02, 2.830e-02, -6.617e-02, -6.596e-03, 1.731e-02, -2.132e-02, -4.602e-03, 3.269e-02, 7.934e-02, -1.796e-01) * s0_2_2;
	r2 += M4(1.810e-02, -1.739e-02, 3.787e-02, -3.960e-03, 1.665e-02, 8.221e-02, -4.481e-02, 5.124e-02, -1.222e-02, 1.191e-02, -5.879e-02, 9.066e-03, -6.518e-02, -1.547e-01, -5.044e-02, 1.913e-02) * s0_2_2;
	r0 += M4(3.107e-03, 1.158e-02, 2.144e-02, 1.516e-02, 8.395e-03, 2.253e-02, 8.272e-03, 2.957e-03, -5.485e-02, -5.246e-02, -1.212e-02, -3.607e-02, -1.297e-02, -2.081e-02, 3.332e-03, -2.784e-02) * s1_0_0;
	r1 += M4(5.864e-02, 5.869e-02, -8.635e-02, 7.762e-03, -2.093e-03, -1.408e-02, 4.353e-02, 3.577e-03, 2.063e-03, -6.787e-03, -1.054e-01, 1.191e-02, -1.469e-02, -4.263e-03, -3.767e-01, 2.080e-02) * s1_0_0;
	r2 += M4(2.114e-03, 3.718e-02, 1.758e-02, 1.157e-01, -1.324e-02, 4.671e-03, 1.774e-02, 2.228e-02, -1.798e-02, -1.193e-02, -1.390e-02, -2.447e-02, 1.007e-02, -1.636e-02, 9.735e-04, 1.338e-02) * s1_0_0;
	r0 += M4(4.735e-02, -1.066e-01, 2.175e-02, -7.945e-02, -8.061e-03, 1.437e-03, -6.091e-02, 3.964e-02, -3.956e-02, -9.720e-02, 2.674e-02, -4.554e-02, -5.833e-02, -3.417e-02, -2.432e-02, 9.846e-04) * s1_0_1;
	r1 += M4(-2.324e-02, -6.448e-02, 1.460e-01, 1.378e-01, 2.207e-02, -6.282e-02, 2.996e-01, -5.978e-02, -1.586e-01, 5.066e-02, -4.124e-01, 1.248e-01, -4.322e-02, 9.863e-03, -7.509e-01, -2.908e-02) * s1_0_1;
	r2 += M4(-5.629e-02, 2.875e-02, -2.529e-02, 1.901e-01, 2.089e-03, 7.281e-02, 5.358e-03, 1.380e-01, -7.445e-02, -8.170e-02, -1.674e-01, -2.539e-01, 1.029e-02, 1.365e-02, -6.412e-02, -2.678e-01) * s1_0_1;
	r0 += M4(-3.355e-03, -2.435e-04, 3.959e-02, 2.061e-03, 1.605e-02, -1.660e-02, 4.754e-02, 1.155e-02, 2.086e-02, -1.028e-01, 4.219e-02, -3.529e-02, 4.230e-02, 1.651e-02, 1.423e-02, 3.906e-02) * s1_0_2;
	r1 += M4(2.413e-02, 3.939e-02, -1.330e-02, 4.241e-02, -1.054e-02, 2.836e-02, 2.610e-01, -1.534e-02, -1.652e-02, 3.552e-02, -1.164e-01, 2.442e-02, 1.950e-02, -1.119e-02, -1.255e-01, -4.475e-02) * s1_0_2;
	r2 += M4(-2.003e-02, -2.909e-03, -3.370e-02, 5.844e-03, 1.795e-02, -6.467e-02, -5.534e-02, -5.210e-02, -1.958e-01, -2.377e-02, -1.180e-01, 1.187e-03, 2.460e-02, 1.577e-02, 7.248e-02, -3.579e-02) * s1_0_2;
	r0 += M4(4.475e-02, 1.086e-01, 1.285e-02, 1.138e-01, -1.657e-02, 1.804e-02, 2.791e-02, 5.013e-02, -1.754e-01, -1.182e-01, 9.815e-03, -3.548e-02, -4.760e-02, -4.868e-03, -4.234e-02, 2.948e-02) * s1_1_0;
	r1 += M4(-3.135e-01, 2.417e-01, -1.771e-02, 1.601e-01, 6.702e-03, 8.702e-03, -2.013e-02, 1.293e-02, 2.831e-03, -4.515e-02, -3.470e-02, -1.968e-01, 1.673e-03, -5.140e-01, -1.065e-01, 2.658e-02) * s1_1_0;
	r2 += M4(-9.502e-03, -2.985e-02, -2.884e-01, -3.822e-02, -2.189e-02, 6.262e-02, 1.607e-02, 2.079e-02, 4.244e-03, -8.567e-02, -4.384e-02, -1.392e-02, -7.165e-03, -6.779e-02, 5.826e-03, -5.711e-02) * s1_1_0;
	r0 += M4(-3.189e-03, 2.671e-01, 5.796e-05, 3.062e-01, -7.474e-02, 6.295e-02, -2.538e-02, 8.165e-02, -1.129e-02, -4.106e-01, -2.383e-01, -4.060e-01, -1.826e-01, -1.510e-01, 1.854e-02, -3.556e-01) * s1_1_1;
	r1 += M4(1.179e-01, -1.714e-01, -1.519e-01, -1.307e-02, 2.116e-01, -3.991e-01, -1.013e-01, -8.649e-02, -2.114e-01, 1.127e-02, -8.022e-02, -4.909e-01, -2.870e-01, 4.122e-02, -2.208e-01, -1.909e-02) * s1_1_1;
	r2 += M4(-2.480e-01, -1.323e-01, 5.592e-02, -1.869e-01, -1.157e-01, 1.227e-01, 1.489e-01, -6.991e-02, -1.743e-01, -4.634e-01, -3.623e-01, -5.204e-02, 2.247e-02, -2.183e-01, -2.055e-01, -1.746e-01) * s1_1_1;
	r0 += M4(-3.309e-02, 1.323e-02, 5.824e-02, 7.154e-02, 2.275e-02, -1.165e-01, 9.382e-02, 7.822e-02, 9.758e-02, -5.363e-02, 1.129e-01, 9.188e-02, -6.816e-02, 1.973e-03, -9.055e-03, -1.824e-01) * s1_1_2;
	r1 += M4(-1.049e-02, 5.783e-02, -4.049e-03, -5.899e-02, -1.926e-02, 5.155e-02, -1.382e-01, -5.524e-02, 4.509e-02, 2.471e-02, 9.066e-03, -3.917e-02, 1.087e-01, -1.453e-02, 7.399e-02, 4.216e-02) * s1_1_2;
	r2 += M4(-7.270e-02, 4.267e-02, -8.434e-02, 1.817e-02, -2.035e-01, 3.185e-01, -7.634e-02, 9.057e-02, -3.619e-01, 1.835e-02, -1.039e-01, -5.187e-02, 2.653e-01, -3.155e-01, 7.400e-02, -9.921e-03) * s1_1_2;
	r0 += M4(7.621e-02, -1.336e-01, 1.333e-01, -6.702e-02, 2.088e-02, 3.554e-02, 4.308e-02, 1.013e-02, -7.691e-02, -6.612e-02, -7.727e-02, -1.426e-03, -5.259e-02, -1.284e-01, -7.730e-03, -4.678e-02) * s1_2_0;
	r1 += M4(-1.013e-01, 2.261e-01, -3.380e-02, -1.587e-01, 3.157e-02, -1.694e-01, 3.410e-03, 4.283e-04, -2.706e-02, 3.077e-02, -2.929e-03, -1.303e-02, -2.182e-02, -2.112e-01, -5.065e-03, -1.299e-01) * s1_2_0;
	r2 += M4(4.713e-02, -4.116e-02, -5.199e-02, -2.440e-02, -3.205e-03, 1.063e-02, 4.442e-03, -9.867e-03, -6.507e-03, -5.185e-02, -3.519e-02, -1.962e-02, -2.065e-02, -3.499e-02, -2.412e-02, -1.340e-02) * s1_2_0;
	r0 += M4(-6.821e-02, -1.929e-01, -1.160e-01, -1.353e-01, -2.210e-01, -2.068e-02, -1.685e-01, 3.543e-02, -6.563e-02, 1.019e-02, -4.082e-01, 4.424e-02, -2.716e-01, -1.832e-01, -3.463e-01, -1.564e-02) * s1_2_1;
	r1 += M4(1.106e-01, -9.578e-02, -4.952e-02, -1.025e-01, 2.508e-02, 1.199e-02, 2.159e-02, -2.875e-02, -9.892e-02, -8.131e-02, -8.613e-03, 1.748e-01, -5.881e-02, 1.139e-01, 7.580e-02, -1.349e-01) * s1_2_1;
	r2 += M4(1.219e-02, 1.042e-01, 5.724e-02, 2.411e-02, -2.855e-02, 8.805e-03, 4.169e-02, 4.972e-02, -4.793e-02, -8.169e-02, -1.097e-01, 1.173e-03, -3.051e-02, -6.606e-02, -1.018e-01, 2.147e-02) * s1_2_1;
	r0 += M4(4.700e-02, 6.189e-02, 6.446e-02, -8.295e-02, 1.166e-01, 2.074e-01, 3.486e-01, -5.429e-02, 1.672e-02, -3.458e-02, 1.872e-01, -1.476e-01, 8.407e-04, -1.875e-01, -3.005e-01, -1.213e-01) * s1_2_2;
	r1 += M4(1.268e-04, 2.877e-02, 3.212e-02, 5.844e-02, 1.398e-02, 7.217e-02, -4.137e-02, 1.578e-01, -2.152e-02, 1.474e-04, 2.911e-02, -1.066e-02, 1.573e-02, -2.220e-02, -1.082e-02, -9.173e-02) * s1_2_2;
	r2 += M4(4.988e-04, -6.838e-02, 4.784e-03, -1.594e-02, -5.439e-02, -4.115e-02, 1.556e-02, -5.603e-02, -6.197e-02, -1.314e-01, -5.996e-02, 6.147e-04, 1.028e-01, -1.246e-01, 4.638e-04, -2.668e-02) * s1_2_2;
	s0_0_0 = G[2][xy.y+0][xy.x+0]; s0_0_1 = G[2][xy.y+0][xy.x+1];
	s0_0_2 = G[2][xy.y+0][xy.x+2]; s0_1_0 = G[2][xy.y+1][xy.x+0];
	s0_1_1 = G[2][xy.y+1][xy.x+1]; s0_1_2 = G[2][xy.y+1][xy.x+2];
	s0_2_0 = G[2][xy.y+2][xy.x+0]; s0_2_1 = G[2][xy.y+2][xy.x+1];
	s0_2_2 = G[2][xy.y+2][xy.x+2];
	r0 += M4(5.335e-02, -5.143e-02, -4.690e-02, -6.546e-03, 1.966e-02, -2.873e-02, 1.410e-02, -8.922e-03, 1.411e-02, 3.253e-02, -8.028e-03, 2.134e-02, 1.213e-02, 1.893e-02, 2.654e-02, -3.222e-03) * s0_0_0;
	r1 += M4(-1.104e-02, 8.098e-02, -4.352e-02, 4.010e-02, 1.194e-02, -1.718e-01, 1.027e-01, 4.948e-03, -3.360e-02, -6.466e-02, 5.575e-02, -5.926e-02, -9.364e-02, 3.932e-02, -1.096e-01, -3.537e-02) * s0_0_0;
	r2 += M4(3.328e-02, 1.282e-02, -1.133e-02, -5.905e-02, 5.496e-03, -9.852e-03, 4.512e-02, 2.719e-02, 3.083e-02, 7.357e-03, 3.639e-03, 8.608e-03, 1.188e-02, -6.113e-03, -3.101e-02, 1.611e-03) * s0_0_0;
	r0 += M4(1.368e-01, -1.485e-01, -3.155e-02, -8.518e-02, -1.271e-01, -2.419e-02, -4.154e-02, -2.822e-02, -3.432e-02, 9.719e-02, 1.473e-02, 4.513e-02, 6.269e-03, -2.387e-02, -7.036e-03, 1.986e-02) * s0_0_1;
	r1 += M4(1.628e-01, -3.920e-03, 2.011e-02, -9.797e-02, -2.587e-01, -1.213e-01, -2.415e-01, 8.190e-02, 3.969e-03, 3.329e-02, 3.096e-01, -5.315e-02, 3.705e-02, -3.432e-02, 9.354e-02, -3.716e-02) * s0_0_1;
	r2 += M4(-7.512e-03, 5.029e-02, 2.155e-01, 2.941e-01, 1.063e-02, -4.805e-02, -1.946e-01, -5.723e-01, 8.794e-02, 2.777e-02, 5.489e-02, 1.157e-01, 8.845e-03, 5.264e-02, 3.274e-02, -7.783e-02) * s0_0_1;
	r0 += M4(3.844e-02, -3.256e-02, 5.018e-02, -1.896e-02, -1.942e-01, -1.729e-01, -7.190e-02, -1.088e-01, 2.796e-02, -2.758e-02, -3.001e-02, -2.889e-03, 1.632e-02, 2.463e-03, 7.973e-03, 3.330e-02) * s0_0_2;
	r1 += M4(7.497e-02, 5.606e-02, -1.043e-01, -2.441e-02, -5.492e-03, 4.213e-02, 6.905e-02, 3.399e-02, -1.387e-02, -1.182e-02, -1.603e-04, 2.858e-02, -1.746e-02, 3.094e-02, 5.922e-02, -7.397e-02) * s0_0_2;
	r2 += M4(1.553e-02, 8.300e-02, 3.327e-02, -7.542e-02, 2.684e-03, -9.249e-02, -1.535e-01, -1.227e-01, 4.256e-02, 4.263e-02, -9.907e-03, 6.812e-02, 2.343e-03, 4.584e-02, -6.779e-03, 5.235e-02) * s0_0_2;
	r0 += M4(6.354e-02, -4.274e-02, 7.263e-02, 1.247e-03, 4.919e-02, -2.391e-02, 9.516e-02, -2.121e-02, 3.260e-02, 1.437e-01, 2.479e-02, 6.416e-02, 5.629e-02, 1.246e-02, -1.224e-02, 1.391e-02) * s0_1_0;
	r1 += M4(2.548e-02, 7.246e-01, 1.151e-01, -1.234e-01, -5.769e-03, -3.594e-01, -3.112e-03, -6.318e-02, 1.314e-02, -7.892e-02, 2.328e-01, -2.720e-02, -1.373e-01, 1.073e-02, 1.003e-01, -1.558e-02) * s0_1_0;
	r2 += M4(8.070e-02, 2.099e-02, 4.746e-02, -1.100e-02, 4.582e-02, 1.918e-02, -3.294e-02, 3.267e-02, 6.812e-02, 3.919e-02, 1.302e-02, 7.691e-02, 3.657e-02, 3.164e-02, -1.870e-01, -2.953e-02) * s0_1_0;
	r0 += M4(7.151e-01, 3.604e-01, 3.218e-02, 2.914e-01, -2.285e-01, -3.292e-01, -3.583e-01, -3.799e-01, -5.580e-02, 2.822e-01, -5.014e-02, 4.945e-02, -1.978e-02, 1.895e-01, 1.243e-01, -4.679e-02) * s0_1_1;
	r1 += M4(1.540e-02, -1.867e-01, 7.202e-02, 2.453e-01, -1.934e-01, -1.791e-01, -1.016e-02, -3.498e-01, 8.724e-02, -7.108e-02, -2.188e-01, -7.628e-02, -6.891e-02, -4.464e-02, 1.743e-01, 1.734e-01) * s0_1_1;
	r2 += M4(3.660e-01, 8.553e-02, 9.361e-02, -2.085e-01, 1.392e-01, 1.438e-02, -1.724e-01, 7.701e-02, 1.337e-01, 1.362e-02, 1.619e-01, 1.426e-01, 5.945e-03, 2.832e-02, 6.412e-02, 5.020e-01) * s0_1_1;
	r0 += M4(-6.665e-02, 1.843e-01, 1.958e-01, 7.444e-02, -2.593e-01, -3.037e-01, -3.933e-01, -6.543e-01, -8.622e-02, 7.047e-03, -1.950e-02, 1.528e-01, -1.025e-01, -2.350e-02, -7.056e-02, 1.280e-02) * s0_1_2;
	r1 += M4(1.194e-01, 1.851e-02, 3.668e-02, 1.509e-01, 1.258e-02, -1.474e-02, -5.737e-02, -2.087e-01, -8.064e-02, 5.093e-02, 6.302e-02, -2.300e-02, 5.868e-03, 1.511e-02, -5.614e-02, 7.300e-02) * s0_1_2;
	r2 += M4(-7.053e-02, -6.214e-02, 1.333e-01, 1.097e-02, 8.670e-02, -4.121e-02, 8.180e-03, 3.993e-02, 2.712e-02, 7.782e-02, -6.101e-02, -3.039e-02, -1.738e-02, 9.697e-02, -7.140e-02, 7.941e-02) * s0_1_2;
	r0 += M4(-2.952e-03, 1.844e-02, -1.291e-01, -1.631e-02, 1.671e-03, 1.009e-02, 4.629e-02, 3.308e-02, -2.599e-03, 1.759e-01, -9.646e-02, 4.226e-02, 1.067e-01, -7.635e-02, 1.187e-01, 4.906e-02) * s0_2_0;
	r1 += M4(1.304e-01, 7.245e-02, -4.021e-02, 9.207e-03, -2.545e-02, -1.137e-01, 1.140e-03, 2.126e-02, 2.719e-02, -3.820e-03, 5.551e-02, 1.434e-01, 2.532e-03, 2.200e-02, -1.390e-02, -7.595e-02) * s0_2_0;
	r2 += M4(3.364e-02, -5.349e-03, 1.230e-01, 1.531e-02, 3.266e-02, 1.528e-02, -4.053e-02, -9.122e-03, 2.242e-02, 7.200e-02, 1.341e-02, 2.942e-02, -6.568e-02, 1.632e-01, -1.320e-01, 2.411e-02) * s0_2_0;
	r0 += M4(1.890e-01, -1.067e-01, 6.464e-01, -1.401e-01, -5.793e-02, 2.871e-02, -1.213e-01, 6.391e-02, 1.129e-01, 5.692e-01, 1.639e-01, 2.561e-01, -1.162e-01, 7.466e-02, -2.957e-03, 5.274e-01) * s0_2_1;
	r1 += M4(9.584e-02, 1.519e-01, -2.213e-02, -1.519e-01, -2.557e-02, -1.962e-02, 5.213e-02, -2.527e-02, -1.094e-01, -8.659e-02, 5.219e-02, 4.906e-01, -5.730e-01, -1.337e-01, -5.864e-03, 2.420e-01) * s0_2_1;
	r2 += M4(4.163e-02, -1.056e-01, 5.722e-02, 6.073e-03, 1.581e-02, 5.847e-02, 3.162e-02, 1.031e-02, -2.390e-01, -3.316e-02, -5.402e-02, 2.953e-02, -3.564e-01, 5.208e-01, -5.339e-02, -7.404e-02) * s0_2_1;
	r0 += M4(5.530e-02, -2.369e-01, -1.968e-01, -8.618e-02, -4.296e-02, 2.485e-02, -3.936e-01, 9.204e-02, -8.494e-02, 2.336e-01, -1.087e-01, 5.598e-03, -8.568e-02, 7.020e-02, -1.616e-01, 2.368e-01) * s0_2_2;
	r1 += M4(-3.149e-02, -5.134e-02, -3.233e-02, -1.454e-01, -3.389e-02, 2.355e-02, 1.579e-02, 5.233e-02, 7.735e-02, 2.434e-03, -3.265e-02, 4.653e-02, 3.432e-02, -1.331e-03, -4.002e-02, 5.663e-02) * s0_2_2;
	r2 += M4(1.284e-01, -4.565e-02, 3.835e-02, -7.885e-03, 2.415e-02, 4.559e-02, -3.712e-02, 6.807e-03, 3.399e-02, 6.275e-02, 4.810e-02, -1.470e-02, -5.020e-02, 1.486e-01, -1.254e-01, 1.347e-02) * s0_2_2;
	r0 += V4(-1.064e-02, -2.091e-02, -8.879e-03, -1.104e-02);
	r0 = clamp(r0, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0));
	r1 += V4(-8.137e-03, -1.143e-02, -1.886e-02, -1.138e-02);
	r1 = clamp(r1, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r1));
	r2 += V4(4.187e-03, -3.746e-04, -2.149e-03, -8.940e-03);
	r2 = clamp(r2, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(2, 0), vec4(r2));
}

//!DESC [CuNNy_3x12_DS] -out-shuffle
//!HOOK LUMA
//!COMPUTE 16 16 8 8
//!BIND conv3
//!BIND LUMA
//!WIDTH LUMA.w 2 *
//!HEIGHT LUMA.h 2 *
//!COMPONENTS 1
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) V4((conv3_mul * texelFetch(conv3_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(0, 0), 0)))
#define l1(x, y) V4((conv3_mul * texelFetch(conv3_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(1, 0), 0)))
#define l2(x, y) V4((conv3_mul * texelFetch(conv3_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(2, 0), 0)))
shared V4 G[3][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(2, 2);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
			G[1][ay][ax] = l1(x - 1, y - 1);
			G[2][ay][ax] = l2(x - 1, y - 1);
		}
	}
	barrier();
	V4 s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2, s1_0_0, s1_0_1, s1_0_2, s1_1_0, s1_1_1, s1_1_2, s1_2_0, s1_2_1, s1_2_2;
	V4 r0;
	r0 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2]; s1_0_0 = G[1][xy.y+0][xy.x+0];
	s1_0_1 = G[1][xy.y+0][xy.x+1]; s1_0_2 = G[1][xy.y+0][xy.x+2];
	s1_1_0 = G[1][xy.y+1][xy.x+0]; s1_1_1 = G[1][xy.y+1][xy.x+1];
	s1_1_2 = G[1][xy.y+1][xy.x+2]; s1_2_0 = G[1][xy.y+2][xy.x+0];
	s1_2_1 = G[1][xy.y+2][xy.x+1]; s1_2_2 = G[1][xy.y+2][xy.x+2];
	r0 += M4(-1.997e-02, 1.593e-02, -2.142e-02, 7.622e-04, -3.536e-02, 2.103e-03, 1.303e-02, -1.799e-03, -7.402e-02, 6.540e-03, -3.651e-02, 5.539e-03, 1.983e-02, 6.777e-03, -3.650e-03, 4.805e-03) * s0_0_0;
	r0 += M4(7.902e-02, -1.173e-01, -2.952e-03, 2.966e-02, -1.938e-01, -1.946e-01, -6.755e-03, -6.502e-03, 3.750e-01, -2.842e-01, 1.714e-01, -1.670e-01, -3.480e-02, 1.401e-01, -7.627e-03, 2.983e-02) * s0_0_1;
	r0 += M4(-9.827e-06, 8.648e-02, 5.681e-04, 1.421e-02, 7.311e-03, -3.528e-02, -2.757e-03, -3.441e-03, -2.976e-02, -6.379e-02, -1.534e-02, -3.162e-02, 1.302e-02, -6.079e-03, 3.418e-04, 8.850e-04) * s0_0_2;
	r0 += M4(-3.943e-02, 3.901e-03, -2.970e-02, 1.213e-02, 4.213e-02, -9.102e-03, 1.316e-02, -1.873e-02, -1.531e-02, -6.819e-03, -4.968e-02, 3.036e-03, 6.084e-02, -1.935e-02, -7.472e-02, 2.745e-04) * s0_1_0;
	r0 += M4(-7.007e-02, -1.478e-01, 1.102e-01, -3.643e-01, 2.456e-01, 1.712e-01, -1.057e-01, 7.243e-02, 9.262e-03, 7.300e-02, 1.632e-01, 3.768e-02, 6.763e-02, 3.095e-01, -1.974e-02, -6.307e-01) * s0_1_1;
	r0 += M4(1.968e-02, 1.837e-01, 9.659e-03, 2.056e-01, -2.334e-02, 4.368e-02, 1.104e-02, -1.860e-02, -1.091e-02, -3.694e-03, -2.213e-02, -2.179e-02, 2.128e-03, 4.726e-03, -6.886e-03, 3.059e-03) * s0_1_2;
	r0 += M4(-1.910e-03, -3.374e-03, -1.663e-02, -3.118e-04, -7.201e-03, -2.534e-03, 2.877e-03, -3.363e-03, -3.070e-03, 6.711e-03, -1.279e-02, 9.499e-03, -3.176e-03, -9.044e-04, 2.521e-02, 7.151e-03) * s0_2_0;
	r0 += M4(8.323e-03, -1.064e-02, -5.213e-02, -2.899e-03, -4.474e-03, -3.807e-03, 9.883e-02, 4.822e-02, -1.508e-03, -6.790e-03, 1.486e-03, -1.581e-02, 4.014e-03, -1.972e-03, 4.285e-02, 7.056e-02) * s0_2_1;
	r0 += M4(5.893e-03, -2.701e-03, 1.565e-02, 3.970e-02, -1.924e-03, -7.485e-04, -1.496e-02, 2.259e-02, 4.866e-04, -4.067e-03, -3.708e-03, -9.145e-03, -3.362e-05, -3.369e-03, -1.659e-03, 6.882e-03) * s0_2_2;
	r0 += M4(-1.960e-04, 5.872e-03, 7.844e-03, -4.121e-04, 6.116e-04, 2.624e-03, -1.555e-03, 1.646e-04, -5.790e-03, -4.048e-03, -7.185e-05, -2.863e-04, 4.604e-03, 4.096e-03, -1.733e-03, -4.615e-03) * s1_0_0;
	r0 += M4(-3.610e-02, -3.994e-03, -8.747e-03, -4.655e-03, 2.051e-02, -2.021e-02, 2.228e-02, -4.602e-03, 4.379e-03, 4.941e-03, 4.027e-03, 3.186e-03, -9.781e-02, -7.371e-02, -5.962e-02, -1.819e-02) * s1_0_1;
	r0 += M4(-2.788e-02, 2.187e-02, -4.971e-03, 2.069e-02, -2.280e-01, 1.610e-01, 3.266e-04, 6.641e-04, -2.020e-03, -7.089e-04, 3.127e-03, 2.448e-03, 3.046e-02, -3.403e-03, 2.073e-02, 5.470e-03) * s1_0_2;
	r0 += M4(-8.911e-02, -2.936e-03, -4.901e-02, 3.868e-03, 3.227e-03, 4.200e-03, 4.224e-03, 3.722e-03, -1.486e-02, -4.009e-05, -2.495e-03, 4.505e-03, -3.212e-02, -4.170e-03, -1.027e-02, 3.936e-03) * s1_1_0;
	r0 += M4(4.320e-01, -1.701e-01, 2.161e-01, -8.004e-02, 1.988e-02, -8.646e-03, 2.634e-02, -1.403e-02, -1.947e-02, -2.643e-02, -3.308e-02, -1.529e-02, -1.929e-01, -1.002e-01, 4.013e-01, 1.489e-01) * s1_1_1;
	r0 += M4(3.786e-02, -1.644e-01, -2.061e-03, -7.494e-02, -9.516e-02, 6.127e-02, -3.860e-01, 3.154e-01, -1.994e-02, -3.162e-02, -4.892e-03, -1.594e-02, -1.864e-02, -7.866e-02, -3.749e-02, 9.448e-02) * s1_1_2;
	r0 += M4(-7.443e-04, -5.879e-04, -4.339e-02, -1.307e-03, -7.464e-07, 1.094e-03, 9.849e-04, 4.206e-03, 1.530e-02, -1.216e-02, -4.662e-03, 2.543e-04, 5.228e-03, 5.050e-03, -3.274e-03, 7.251e-03) * s1_2_0;
	r0 += M4(-3.684e-02, -6.909e-03, 1.322e-01, -4.979e-02, 4.264e-03, 1.835e-03, 3.456e-03, -4.366e-03, -3.037e-01, -1.302e-02, 3.096e-01, 7.007e-02, -1.986e-03, -3.014e-03, -9.684e-02, -4.846e-02) * s1_2_1;
	r0 += M4(-7.767e-03, 4.328e-02, 7.981e-03, -4.539e-02, -2.176e-03, 2.009e-02, 2.741e-02, -3.035e-03, 2.188e-02, -1.078e-01, -2.000e-02, 5.295e-02, -2.538e-03, -5.671e-03, 6.670e-05, -3.864e-02) * s1_2_2;
	s0_0_0 = G[2][xy.y+0][xy.x+0]; s0_0_1 = G[2][xy.y+0][xy.x+1];
	s0_0_2 = G[2][xy.y+0][xy.x+2]; s0_1_0 = G[2][xy.y+1][xy.x+0];
	s0_1_1 = G[2][xy.y+1][xy.x+1]; s0_1_2 = G[2][xy.y+1][xy.x+2];
	s0_2_0 = G[2][xy.y+2][xy.x+0]; s0_2_1 = G[2][xy.y+2][xy.x+1];
	s0_2_2 = G[2][xy.y+2][xy.x+2];
	r0 += M4(-1.060e-02, -2.417e-03, -3.369e-03, 1.480e-02, 5.994e-02, -5.429e-03, 2.402e-03, 2.703e-03, 8.873e-03, -2.112e-03, -1.887e-03, 2.919e-03, 4.741e-04, -1.312e-03, 1.228e-03, 4.626e-03) * s0_0_0;
	r0 += M4(-2.710e-03, -2.305e-03, -4.674e-02, -4.219e-02, 9.789e-02, 1.613e-01, 2.820e-03, -1.824e-04, 6.007e-02, 1.435e-02, 1.656e-02, 8.830e-03, -1.302e-02, -2.279e-03, -8.817e-03, -8.452e-03) * s0_0_1;
	r0 += M4(-6.971e-03, -1.478e-02, 6.121e-03, -1.394e-02, -1.154e-02, -3.490e-04, -8.617e-04, -6.654e-03, -1.631e-02, -6.873e-03, 2.901e-03, 9.940e-03, 7.430e-03, -1.870e-03, 4.319e-03, 1.705e-03) * s0_0_2;
	r0 += M4(-4.503e-03, -4.896e-02, -5.193e-03, -4.382e-02, -8.422e-02, 3.081e-02, 4.278e-02, -2.068e-02, 1.091e-01, 2.391e-04, 6.067e-02, -5.288e-03, -5.053e-04, 1.032e-02, -8.952e-03, 5.276e-04) * s0_1_0;
	r0 += M4(1.380e-01, 1.265e-01, 1.372e-01, 1.228e-01, -1.409e-01, -3.525e-01, 1.968e-01, 3.135e-01, -2.881e-01, 2.308e-01, -1.155e-01, 1.105e-01, 2.600e-01, -1.081e-04, 4.038e-02, -2.889e-02) * s0_1_1;
	r0 += M4(-2.483e-02, 1.872e-02, -2.569e-02, 2.053e-02, -7.188e-03, 2.944e-02, -1.394e-02, 7.372e-03, 1.913e-02, -1.840e-01, -1.666e-02, -1.460e-01, -1.489e-02, 6.679e-03, -3.915e-03, -3.907e-03) * s0_1_2;
	r0 += M4(-6.747e-03, 1.047e-02, -1.259e-02, -1.197e-02, 5.572e-03, 7.804e-04, -2.943e-02, -8.698e-03, 1.529e-03, 3.779e-03, 5.330e-02, 3.234e-03, -1.139e-02, 6.493e-03, 1.925e-02, -6.306e-03) * s0_2_0;
	r0 += M4(-4.016e-02, -3.574e-02, 8.974e-04, 4.319e-03, -9.350e-03, 4.734e-03, -6.455e-02, -9.546e-02, 3.710e-02, -1.051e-03, -5.462e-02, 6.518e-02, 8.670e-02, -4.548e-02, -5.918e-01, 1.432e-01) * s0_2_1;
	r0 += M4(2.316e-03, -1.466e-02, -9.226e-03, -1.570e-02, 2.743e-03, 1.230e-03, -1.084e-03, -9.783e-03, -8.812e-03, -6.144e-03, 6.925e-03, -5.774e-02, -5.707e-04, -6.470e-02, 8.674e-03, -1.456e-02) * s0_2_2;
	r0 += V4(9.114e-10, 2.263e-11, 1.001e-09, 2.503e-11);
	r0 = r0;
	vec2 opt = 0.5 * LUMA_pt;
	vec2 fpos = (vec2(opos) + vec2(0.5)) * opt;
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0.x + LUMA_tex(fpos + vec2(0.0, 0.0) * opt).r, 0.0, 0.0, 1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r0.y + LUMA_tex(fpos + vec2(1.0, 0.0) * opt).r, 0.0, 0.0, 1.0));
	imageStore(out_image, opos + ivec2(0, 1), vec4(r0.z + LUMA_tex(fpos + vec2(0.0, 1.0) * opt).r, 0.0, 0.0, 1.0));
	imageStore(out_image, opos + ivec2(1, 1), vec4(r0.w + LUMA_tex(fpos + vec2(1.0, 1.0) * opt).r, 0.0, 0.0, 1.0));
}
